diff --git a/src/Data/Text.hs b/src/Data/Text.hs index 60b1aeda..78ae7843 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -218,7 +218,7 @@ import Control.DeepSeq (NFData(rnf)) #if defined(ASSERTS) import Control.Exception (assert) #endif -import Data.Bits ((.&.), shiftR, shiftL) +import Data.Bits ((.&.)) import qualified Data.Char as Char import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf), constrIndex, Constr, mkConstr, DataType, mkDataType, Fixity(Prefix)) @@ -231,20 +231,20 @@ import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) import Data.Text.Internal.Reverse (reverse) -import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr2, chr3, chr4, ord2, ord3, ord4) +import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr3, ord2, ord3, ord4) import qualified Data.Text.Internal.Fusion as S -import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping) import qualified Data.Text.Internal.Fusion.Common as S import Data.Text.Encoding (decodeUtf8', encodeUtf8) import Data.Text.Internal.Fusion (stream, reverseStream, unstream) import Data.Text.Internal.Private (span_) import Data.Text.Internal (Text(..), empty, firstf, mul, safe, text, append, pack) -import Data.Text.Internal.Unsafe.Char (unsafeWrite, unsafeChr8) +import Data.Text.Internal.Unsafe.Char (unsafeWrite) import Data.Text.Show (singleton, unpack, unpackCString#, unpackCStringAscii#) import qualified Prelude as P import Data.Text.Unsafe (Iter(..), iter, iter_, lengthWord8, reverseIter, reverseIter_, unsafeHead, unsafeTail, iterArray, reverseIterArray) import Data.Text.Internal.Search (indices) +import Data.Text.Internal.Transformation (mapNonEmpty, toCaseFoldNonEmpty, toLowerNonEmpty, toUpperNonEmpty, filter_) #if defined(__HADDOCK__) import Data.ByteString (ByteString) import qualified Data.Text.Lazy as L @@ -253,7 +253,7 @@ import Data.Word (Word8) import Foreign.C.Types import GHC.Base (eqInt, neInt, gtInt, geInt, ltInt, leInt, ByteArray#) import qualified GHC.Exts as Exts -import GHC.Int (Int8, Int64(..)) +import GHC.Int (Int8) import GHC.Stack (HasCallStack) import qualified Language.Haskell.TH.Lib as TH import qualified Language.Haskell.TH.Syntax as TH @@ -418,10 +418,14 @@ instance TH.Lift Text where #if MIN_VERSION_template_haskell(2,16,0) lift txt = do let (ptr, len) = unsafePerformIO $ asForeignPtr txt - bytesQ = TH.litE . TH.bytesPrimL $ TH.mkBytes ptr 0 (P.fromIntegral len) - lenQ = liftInt (P.fromIntegral len) - liftInt n = (TH.appE (TH.conE 'Exts.I#) (TH.litE (TH.IntPrimL n))) - TH.varE 'unpackCStringLen# `TH.appE` bytesQ `TH.appE` lenQ + case len of + 0 -> TH.varE 'empty + _ -> + let + bytesQ = TH.litE . TH.bytesPrimL $ TH.mkBytes ptr 0 (P.fromIntegral len) + lenQ = liftInt (P.fromIntegral len) + liftInt n = (TH.appE (TH.conE 'Exts.I#) (TH.litE (TH.IntPrimL n))) + in TH.varE 'unpackCStringLen# `TH.appE` bytesQ `TH.appE` lenQ #else lift = TH.appE (TH.varE 'pack) . TH.stringE . unpack #endif @@ -499,7 +503,7 @@ uncons t@(Text arr off len) -- non-empty. This is a partial function, consider using 'unsnoc' instead. last :: HasCallStack => Text -> Char last t@(Text _ _ len) - | len <= 0 = emptyError "last" + | null t = emptyError "last" | otherwise = let Iter c _ = reverseIter t (len - 1) in c {-# INLINE [1] last #-} @@ -507,7 +511,7 @@ last t@(Text _ _ len) -- must be non-empty. This is a partial function, consider using 'uncons' instead. tail :: HasCallStack => Text -> Text tail t@(Text arr off len) - | len <= 0 = emptyError "tail" + | null t = emptyError "tail" | otherwise = text arr (off+d) (len-d) where d = iter_ t 0 {-# INLINE [1] tail #-} @@ -516,7 +520,7 @@ tail t@(Text arr off len) -- be non-empty. This is a partial function, consider using 'unsnoc' instead. init :: HasCallStack => Text -> Text init t@(Text arr off len) - | len <= 0 = emptyError "init" + | null t = emptyError "init" | otherwise = text arr off (len + reverseIter_ t (len - 1)) {-# INLINE [1] init #-} @@ -526,7 +530,7 @@ init t@(Text arr off len) -- @since 1.2.3.0 unsnoc :: Text -> Maybe (Text, Char) unsnoc t@(Text arr off len) - | len <= 0 = Nothing + | null t = Nothing | otherwise = Just (text arr off (len + d), c) where Iter c d = reverseIter t (len - 1) @@ -541,6 +545,10 @@ null (Text _arr _off len) = len <= 0 {-# INLINE [1] null #-} +{-# RULES + "TEXT null/empty -> True" null empty = True +#-} + -- | /O(1)/ Tests whether a 'Text' contains exactly one character. isSingleton :: Text -> Bool isSingleton = S.isSingleton . stream @@ -576,6 +584,8 @@ length = P.negate . measureOff P.maxBound length (intersperse c t) = max 0 (mul 2 (length t) - 1) "TEXT length/intercalate -> n*length" forall s ts. length (intercalate s ts) = let lenS = length s in max 0 (P.sum (P.map (\t -> length t + lenS) ts) - lenS) +"TEXT length/empty -> 0" + length empty = 0 #-} -- | /O(min(n,c))/ Compare the count of characters in a 'Text' to a number. @@ -639,28 +649,7 @@ compareLength t c = S.compareLengthI (stream t) c -- -- Performs replacement on invalid scalar values. map :: (Char -> Char) -> Text -> Text -map f = go - where - go (Text src o l) = runST $ do - marr <- A.new (l + 4) - outer marr (l + 4) o 0 - where - outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text - outer !dst !dstLen = inner - where - inner !srcOff !dstOff - | srcOff >= l + o = do - A.shrinkM dst dstOff - arr <- A.unsafeFreeze dst - return (Text arr 0 dstOff) - | dstOff + 4 > dstLen = do - let !dstLen' = dstLen + (l + o) - srcOff + 4 - dst' <- A.resizeM dst dstLen' - outer dst' dstLen' srcOff dstOff - | otherwise = do - let !(Iter c d) = iterArray src srcOff - d' <- unsafeWrite dst dstOff (safe (f c)) - inner (srcOff + d) (dstOff + d') +map f = \t -> if null t then empty else mapNonEmpty f t {-# INLINE [1] map #-} {-# RULES @@ -690,7 +679,7 @@ intercalate t = concat . L.intersperse t -- -- Performs replacement on invalid scalar values. intersperse :: Char -> Text -> Text -intersperse c t@(Text src o l) = if l == 0 then mempty else runST $ do +intersperse c t@(Text src o l) = if null t then empty else runST $ do let !cLen = utf8Length c dstLen = l + length t P.* cLen @@ -784,9 +773,9 @@ replace needle@(Text _ _ neeLen) (Text repArr repOff repLen) haystack@(Text hayArr hayOff hayLen) | neeLen == 0 = emptyError "replace" + | len == 0 = empty -- if also haystack is empty, we can't just return 'haystack' as worker/wrapper might duplicate it | L.null ixs = haystack - | len > 0 = Text (A.run x) 0 len - | otherwise = empty + | otherwise = Text (A.run x) 0 len where ixs = indices needle haystack len = hayLen - (neeLen - repLen) `mul` L.length ixs @@ -823,84 +812,6 @@ replace needle@(Text _ _ neeLen) -- sensitivity should use appropriate versions of the -- . -caseConvert :: (Word8 -> Word8) -> (Exts.Char# -> _ {- unboxed Int64 -}) -> Text -> Text -caseConvert ascii remap (Text src o l) = runST $ do - -- Case conversion a single code point may produce up to 3 code-points, - -- each up to 4 bytes, so 12 in total. - dst <- A.new (l + 12) - outer dst l o 0 - where - outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text - outer !dst !dstLen = inner - where - inner !srcOff !dstOff - | srcOff >= o + l = do - A.shrinkM dst dstOff - arr <- A.unsafeFreeze dst - return (Text arr 0 dstOff) - | dstOff + 12 > dstLen = do - -- Ensure to extend the buffer by at least 12 bytes. - let !dstLen' = dstLen + max 12 (l + o - srcOff) - dst' <- A.resizeM dst dstLen' - outer dst' dstLen' srcOff dstOff - -- If a character is to remain unchanged, no need to decode Char back into UTF8, - -- just copy bytes from input. - | otherwise = do - let m0 = A.unsafeIndex src srcOff - m1 = A.unsafeIndex src (srcOff + 1) - m2 = A.unsafeIndex src (srcOff + 2) - m3 = A.unsafeIndex src (srcOff + 3) - !d = utf8LengthByLeader m0 - case d of - 1 -> do - A.unsafeWrite dst dstOff (ascii m0) - inner (srcOff + 1) (dstOff + 1) - 2 -> do - let !(Exts.C# c) = chr2 m0 m1 - dstOff' <- case I64# (remap c) of - 0 -> do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - pure $ dstOff + 2 - i -> writeMapping i dstOff - inner (srcOff + 2) dstOff' - 3 -> do - let !(Exts.C# c) = chr3 m0 m1 m2 - dstOff' <- case I64# (remap c) of - 0 -> do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - A.unsafeWrite dst (dstOff + 2) m2 - pure $ dstOff + 3 - i -> writeMapping i dstOff - inner (srcOff + 3) dstOff' - _ -> do - let !(Exts.C# c) = chr4 m0 m1 m2 m3 - dstOff' <- case I64# (remap c) of - 0 -> do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - A.unsafeWrite dst (dstOff + 2) m2 - A.unsafeWrite dst (dstOff + 3) m3 - pure $ dstOff + 4 - i -> writeMapping i dstOff - inner (srcOff + 4) dstOff' - - writeMapping :: Int64 -> Int -> ST s Int - writeMapping 0 dstOff = pure dstOff - writeMapping i dstOff = do - let (ch, j) = chopOffChar i - d <- unsafeWrite dst dstOff ch - writeMapping j (dstOff + d) - - chopOffChar :: Int64 -> (Char, Int64) - chopOffChar ab = (chr a, ab `shiftR` 21) - where - chr (Exts.I# n) = Exts.C# (Exts.chr# n) - mask = (1 `shiftL` 21) - 1 - a = P.fromIntegral $ ab .&. mask -{-# INLINE caseConvert #-} - -- | /O(n)/ Convert a string to folded case. -- -- This function is mainly useful for performing caseless (also known @@ -918,7 +829,9 @@ caseConvert ascii remap (Text src o l) = runST $ do -- U+00B5) is case folded to \"μ\" (small letter mu, U+03BC) -- instead of itself. toCaseFold :: Text -> Text -toCaseFold = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) foldMapping xs +toCaseFold = \t -> + if null t then empty + else toCaseFoldNonEmpty t {-# INLINE toCaseFold #-} -- | /O(n)/ Convert a string to lower case, using simple case @@ -929,7 +842,9 @@ toCaseFold = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) foldM -- U+0130) maps to the sequence \"i\" (Latin small letter i, U+0069) -- followed by \" ̇\" (combining dot above, U+0307). toLower :: Text -> Text -toLower = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) lowerMapping xs +toLower = \t -> + if null t then empty + else toLowerNonEmpty t {-# INLINE toLower #-} -- | /O(n)/ Convert a string to upper case, using simple case @@ -939,7 +854,9 @@ toLower = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) lowerMap -- instance, the German \"ß\" (eszett, U+00DF) maps to the -- two-letter sequence \"SS\". toUpper :: Text -> Text -toUpper = \xs -> caseConvert (\w -> if w - 97 <= 25 then w - 32 else w) upperMapping xs +toUpper = \t -> + if null t then empty + else toUpperNonEmpty t {-# INLINE toUpper #-} -- | /O(n)/ Convert a string to title case, using simple case @@ -1370,8 +1287,10 @@ unfoldrN n f s = unstream (S.unfoldrN n (firstf safe . f) s) take :: Int -> Text -> Text take n t@(Text arr off len) | n <= 0 = empty - | n >= len = t - | otherwise = let m = measureOff n t in if m >= 0 then text arr off m else t + | n >= len || m >= len || m < 0 = t + | otherwise = Text arr off m + where + m = measureOff n t {-# INLINE [1] take #-} -- | /O(n)/ If @t@ is long enough to contain @n@ characters, 'measureOff' @n@ @t@ @@ -1423,8 +1342,8 @@ iterNEnd n t@(Text _arr _off len) = loop (len-1) n drop :: Int -> Text -> Text drop n t@(Text arr off len) | n <= 0 = t - | n >= len = empty - | otherwise = if m >= 0 then text arr (off+m) (len-m) else mempty + | n >= len || m >= len || m < 0 = empty + | otherwise = Text arr (off+m) (len-m) where m = measureOff n t {-# INLINE [1] drop #-} @@ -1532,9 +1451,10 @@ strip = dropAround Char.isSpace splitAt :: Int -> Text -> (Text, Text) splitAt n t@(Text arr off len) | n <= 0 = (empty, t) - | n >= len = (t, empty) - | otherwise = let m = measureOff n t in - if m >= 0 then (text arr off m, text arr (off+m) (len-m)) else (t, mempty) + | n >= len || m >= len || m < 0 = (t, empty) + | otherwise = (Text arr off m, Text arr (off+m) (len-m)) + where + m = measureOff n t -- | /O(n)/ 'span', applied to a predicate @p@ and text @t@, returns -- a pair whose first element is the longest prefix (possibly empty) @@ -1632,9 +1552,11 @@ group = groupBy (==) -- | /O(n)/ Return all initial segments of the given 'Text', shortest -- first. inits :: Text -> [Text] -inits t@(Text arr off len) = loop 0 - where loop i | i >= len = [t] - | otherwise = Text arr off i : loop (i + iter_ t i) +inits t = empty : case t of + Text arr off len -> + let loop i | i >= len = [] + | otherwise = let !j = i + iter_ t i in Text arr off j : loop j + in loop 0 -- | /O(n)/ Return all final segments of the given 'Text', longest -- first. @@ -1704,8 +1626,9 @@ splitOn pat@(Text _ _ l) src@(Text arr off len) -- >>> split (=='a') "" -- [""] split :: (Char -> Bool) -> Text -> [Text] -split _ t@(Text _off _arr 0) = [t] -split p t = loop t +split p t + | null t = [empty] + | otherwise = loop t where loop s | null s' = [l] | otherwise = l : loop (unsafeTail s') where (# l, s' #) = span_ (not . p) s @@ -1761,67 +1684,7 @@ partition p t = (filter p t, filter (not . p) t) -- returns a 'Text' containing those characters that satisfy the -- predicate. filter :: (Char -> Bool) -> Text -> Text -filter p = go - where - go (Text src o l) = runST $ do - -- It's tempting to allocate l elements at once and avoid resizing. - -- However, this can be unacceptable in scenarios where a huge array - -- is filtered with a rare predicate, resulting in a much shorter buffer. - let !dstLen = min l 64 - dst <- A.new dstLen - outer dst dstLen o 0 - where - outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text - outer !dst !dstLen = inner - where - inner !srcOff !dstOff - | srcOff >= o + l = do - A.shrinkM dst dstOff - arr <- A.unsafeFreeze dst - return (Text arr 0 dstOff) - | dstOff + 4 > dstLen = do - -- Double size of the buffer, unless it becomes longer than - -- source string. Ensure to extend it by least 4 bytes. - let !dstLen' = dstLen + max 4 (min (l + o - srcOff) dstLen) - dst' <- A.resizeM dst dstLen' - outer dst' dstLen' srcOff dstOff - -- In case of success, filter writes exactly the same character - -- it just read (this is not a case for map, for example). - -- We leverage this fact below: no need to decode Char back into UTF8, - -- just copy bytes from input. - | otherwise = do - let m0 = A.unsafeIndex src srcOff - m1 = A.unsafeIndex src (srcOff + 1) - m2 = A.unsafeIndex src (srcOff + 2) - m3 = A.unsafeIndex src (srcOff + 3) - !d = utf8LengthByLeader m0 - case d of - 1 -> do - let !c = unsafeChr8 m0 - if not (p c) then inner (srcOff + 1) dstOff else do - A.unsafeWrite dst dstOff m0 - inner (srcOff + 1) (dstOff + 1) - 2 -> do - let !c = chr2 m0 m1 - if not (p c) then inner (srcOff + 2) dstOff else do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - inner (srcOff + 2) (dstOff + 2) - 3 -> do - let !c = chr3 m0 m1 m2 - if not (p c) then inner (srcOff + 3) dstOff else do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - A.unsafeWrite dst (dstOff + 2) m2 - inner (srcOff + 3) (dstOff + 3) - _ -> do - let !c = chr4 m0 m1 m2 m3 - if not (p c) then inner (srcOff + 4) dstOff else do - A.unsafeWrite dst dstOff m0 - A.unsafeWrite dst (dstOff + 1) m1 - A.unsafeWrite dst (dstOff + 2) m2 - A.unsafeWrite dst (dstOff + 3) m3 - inner (srcOff + 4) (dstOff + 4) +filter p = filter_ text p {-# INLINE [1] filter #-} {-# RULES @@ -2205,7 +2068,9 @@ overflowError fun = P.error $ "Data.Text." ++ fun ++ ": size overflow" -- copy \"breaks the link\" to the original array, allowing it to be -- garbage collected if there are no other live references to it. copy :: Text -> Text -copy (Text arr off len) = Text (A.run go) 0 len +copy t@(Text arr off len) + | null t = empty + | otherwise = Text (A.run go) 0 len where go :: ST s (A.MArray s) go = do diff --git a/src/Data/Text/Foreign.hs b/src/Data/Text/Foreign.hs index 975e777c..82f010e7 100644 --- a/src/Data/Text/Foreign.hs +++ b/src/Data/Text/Foreign.hs @@ -71,6 +71,7 @@ newtype I8 = I8 Int fromPtr :: Ptr Word8 -- ^ source array -> I8 -- ^ length of source array (in 'Word8' units) -> IO Text +fromPtr _ (I8 0) = pure empty fromPtr ptr (I8 len) = unsafeSTToIO $ do dst <- A.new len A.copyFromPointer dst 0 ptr len diff --git a/src/Data/Text/Internal.hs b/src/Data/Text/Internal.hs index 3fc271ea..4ccde712 100644 --- a/src/Data/Text/Internal.hs +++ b/src/Data/Text/Internal.hs @@ -36,7 +36,6 @@ module Data.Text.Internal , safe -- * Code that must be here for accessibility , empty - , empty_ , append -- * Utilities , firstf @@ -90,12 +89,7 @@ text_ arr off len = -- | /O(1)/ The empty 'Text'. empty :: Text empty = Text A.empty 0 0 -{-# INLINE [1] empty #-} - --- | A non-inlined version of 'empty'. -empty_ :: Text -empty_ = Text A.empty 0 0 -{-# NOINLINE empty_ #-} +{-# NOINLINE empty #-} -- | /O(n)/ Appends one 'Text' to the other by copying both of them -- into a new 'Text'. @@ -117,6 +111,7 @@ append a@(Text arr1 off1 len1) b@(Text arr2 off2 len2) -- | Construct a 'Text' without invisibly pinning its byte array in -- memory if its length has dwindled to zero. +-- It ensures that empty 'Text' values are shared. text :: #if defined(ASSERTS) HasCallStack => @@ -127,7 +122,7 @@ text :: -> Text text arr off len | len == 0 = empty | otherwise = text_ arr off len -{-# INLINE text #-} +{-# INLINE [0] text #-} textP :: A.Array -> Int -> Int -> Text {-# DEPRECATED textP "Use text instead" #-} @@ -247,6 +242,7 @@ int64ToInt32 = fromIntegral -- >>> Data.Text.unpack (pack "\55555") -- "\65533" pack :: String -> Text +pack [] = empty pack xs = runST $ do -- It's tempting to allocate a buffer of 4 * length xs bytes, -- but not only it's wasteful for predominantly ASCII arguments, diff --git a/src/Data/Text/Internal/Lazy.hs b/src/Data/Text/Internal/Lazy.hs index 928e5cfc..74c7ba90 100644 --- a/src/Data/Text/Internal/Lazy.hs +++ b/src/Data/Text/Internal/Lazy.hs @@ -46,6 +46,7 @@ import Data.Typeable (Typeable) import Foreign.Storable (sizeOf) import qualified Data.Text.Array as A import qualified Data.Text.Internal as T +import qualified Data.Text as T data Text = Empty | Chunk {-# UNPACK #-} !T.Text Text @@ -82,9 +83,16 @@ showStructure (Chunk t ts) = -- | Smart constructor for 'Chunk'. Guarantees the data type invariant. chunk :: T.Text -> Text -> Text -{-# INLINE chunk #-} -chunk t@(T.Text _ _ len) ts | len == 0 = ts - | otherwise = Chunk t ts +{-# INLINE [0] chunk #-} +chunk t ts | T.null t = ts + | otherwise = Chunk t ts + +{-# RULES +"TEXT chunk/text" forall arr off len. + chunk (T.text arr off len) = chunk (T.Text arr off len) +"TEXT chunk/empty" forall ts. + chunk T.empty ts = ts +#-} -- | Smart constructor for 'Empty'. empty :: Text diff --git a/src/Data/Text/Internal/Reverse.hs b/src/Data/Text/Internal/Reverse.hs index f3b91b46..0759acac 100644 --- a/src/Data/Text/Internal/Reverse.hs +++ b/src/Data/Text/Internal/Reverse.hs @@ -11,7 +11,7 @@ {-# OPTIONS_GHC -ddump-to-file -ddump-simpl -dsuppress-all -dno-suppress-type-signatures #-} -- | Implements 'reverse', using efficient C routines by default. -module Data.Text.Internal.Reverse (reverse) where +module Data.Text.Internal.Reverse (reverse, reverseNonEmpty) where #if !defined(PURE_HASKELL) import GHC.Exts as Exts @@ -25,7 +25,7 @@ import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader) import GHC.Stack (HasCallStack) #endif import Prelude hiding (reverse) -import Data.Text.Internal (Text(..)) +import Data.Text.Internal (Text(..), empty) import Control.Monad.ST (runST) import qualified Data.Text.Array as A @@ -41,8 +41,16 @@ reverse :: HasCallStack => #endif Text -> Text +reverse (Text _ _ 0) = empty +reverse t = reverseNonEmpty t +{-# INLINE reverse #-} + +-- | /O(n)/ Reverse the characters of a string. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +reverseNonEmpty :: + Text -> Text #if defined(PURE_HASKELL) -reverse (Text src off len) = runST $ do +reverseNonEmtpy (Text src off len) = runST $ do dest <- A.new len _ <- reversePoints src off dest len result <- A.unsafeFreeze dest @@ -82,13 +90,13 @@ reversePoints src xx dest yy = go xx yy where A.copyI pLen dest yNext src x go (x + pLen) yNext #else -reverse (Text (A.ByteArray ba) off len) = runST $ do +reverseNonEmpty (Text (A.ByteArray ba) off len) = runST $ do marr@(A.MutableByteArray mba) <- A.new len unsafeIOToST $ c_reverse mba ba (fromIntegral off) (fromIntegral len) brr <- A.unsafeFreeze marr return $ Text brr 0 len #endif -{-# INLINE reverse #-} +{-# INLINE reverseNonEmpty #-} #if !defined(PURE_HASKELL) -- | The input buffer (src :: ByteArray#, off :: CSize, len :: CSize) diff --git a/src/Data/Text/Internal/Transformation.hs b/src/Data/Text/Internal/Transformation.hs new file mode 100644 index 00000000..026c3272 --- /dev/null +++ b/src/Data/Text/Internal/Transformation.hs @@ -0,0 +1,266 @@ +{-# LANGUAGE BangPatterns, CPP, MagicHash #-} +{-# LANGUAGE Trustworthy #-} +{-# LANGUAGE UnliftedFFITypes #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE PartialTypeSignatures #-} + +{-# OPTIONS_GHC -fno-warn-orphans #-} +{-# OPTIONS_GHC -Wno-partial-type-signatures #-} + +-- | +-- Module : Data.Text.Internal.Transformation +-- Copyright : (c) 2008, 2009 Tom Harper, +-- (c) 2009, 2010 Bryan O'Sullivan, +-- (c) 2009 Duncan Coutts +-- +-- License : BSD-style +-- Maintainer : bos@serpentine.com +-- Stability : experimental +-- Portability : GHC +-- +-- This module holds functions shared between the strict and lazy implementations of @Text@ transformations. + +module Data.Text.Internal.Transformation + ( mapNonEmpty + , reverseNonEmpty + , toCaseFoldNonEmpty + , toLowerNonEmpty + , toUpperNonEmpty + , filter_ + ) where + +import Prelude (Char, Bool(..), Int, + Ord(..), + Monad(..), pure, + (+), (-), ($), + not, return, otherwise, IO) +#if defined(ASSERTS) +import Control.Exception (assert) +#endif +import Data.Bits ((.&.), shiftR, shiftL) +import Control.Monad.ST (ST, runST) +import Control.Monad.ST.Unsafe (unsafeIOToST) +import qualified Data.Text.Array as A +import Data.Text.Internal.Encoding.Utf8 (utf8LengthByLeader, chr2, chr3, chr4) +import Data.Text.Internal.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping) +import Data.Text.Internal (Text(..), safe) +import Data.Text.Internal.Unsafe.Char (unsafeWrite, unsafeChr8) +import qualified Prelude as P +import Data.Text.Unsafe (Iter(..), iterArray) +import Data.Word (Word8) +import Foreign.C.Types +import GHC.Base (ByteArray#) +import qualified GHC.Exts as Exts +import GHC.Int (Int64(..)) + +-- | /O(n)/ 'map' @f@ @t@ is the 'Text' obtained by applying @f@ to +-- each element of @t@. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +mapNonEmpty :: (Char -> Char) -> Text -> Text +mapNonEmpty f = go + where + go (Text src o l) = runST $ do + marr <- A.new (l + 4) + outer marr (l + 4) o 0 + where + outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text + outer !dst !dstLen = inner + where + inner !srcOff !dstOff + | srcOff >= l + o = do + A.shrinkM dst dstOff + arr <- A.unsafeFreeze dst + return (Text arr 0 dstOff) + | dstOff + 4 > dstLen = do + let !dstLen' = dstLen + (l + o) - srcOff + 4 + dst' <- A.resizeM dst dstLen' + outer dst' dstLen' srcOff dstOff + | otherwise = do + let !(Iter c d) = iterArray src srcOff + d' <- unsafeWrite dst dstOff (safe (f c)) + inner (srcOff + d) (dstOff + d') +{-# INLINE mapNonEmpty #-} + +-- | /O(n)/ Reverse the characters of a string. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +reverseNonEmpty :: + Text -> Text +reverseNonEmpty (Text (A.ByteArray ba) off len) = runST $ do + marr@(A.MutableByteArray mba) <- A.new len + unsafeIOToST $ c_reverse mba ba (intToCSize off) (intToCSize len) + brr <- A.unsafeFreeze marr + return $ Text brr 0 len +{-# INLINE reverseNonEmpty #-} + +-- | The input buffer (src :: ByteArray#, off :: CSize, len :: CSize) +-- must specify a valid UTF-8 sequence, this condition is not checked. +foreign import ccall unsafe "_hs_text_reverse" c_reverse + :: Exts.MutableByteArray# s -> ByteArray# -> CSize -> CSize -> IO () + +intToCSize :: Int -> CSize +intToCSize = P.fromIntegral + +caseConvert :: (Word8 -> Word8) -> (Exts.Char# -> _ {- unboxed Int64 -}) -> Text -> Text +caseConvert ascii remap (Text src o l) = runST $ do + -- Case conversion a single code point may produce up to 3 code-points, + -- each up to 4 bytes, so 12 in total. + dst <- A.new (l + 12) + outer dst l o 0 + where + outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s Text + outer !dst !dstLen = inner + where + inner !srcOff !dstOff + | srcOff >= o + l = do + A.shrinkM dst dstOff + arr <- A.unsafeFreeze dst + return (Text arr 0 dstOff) + | dstOff + 12 > dstLen = do + -- Ensure to extend the buffer by at least 12 bytes. + let !dstLen' = dstLen + max 12 (l + o - srcOff) + dst' <- A.resizeM dst dstLen' + outer dst' dstLen' srcOff dstOff + -- If a character is to remain unchanged, no need to decode Char back into UTF8, + -- just copy bytes from input. + | otherwise = do + let m0 = A.unsafeIndex src srcOff + m1 = A.unsafeIndex src (srcOff + 1) + m2 = A.unsafeIndex src (srcOff + 2) + m3 = A.unsafeIndex src (srcOff + 3) + !d = utf8LengthByLeader m0 + case d of + 1 -> do + A.unsafeWrite dst dstOff (ascii m0) + inner (srcOff + 1) (dstOff + 1) + 2 -> do + let !(Exts.C# c) = chr2 m0 m1 + dstOff' <- case I64# (remap c) of + 0 -> do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + pure $ dstOff + 2 + i -> writeMapping i dstOff + inner (srcOff + 2) dstOff' + 3 -> do + let !(Exts.C# c) = chr3 m0 m1 m2 + dstOff' <- case I64# (remap c) of + 0 -> do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + pure $ dstOff + 3 + i -> writeMapping i dstOff + inner (srcOff + 3) dstOff' + _ -> do + let !(Exts.C# c) = chr4 m0 m1 m2 m3 + dstOff' <- case I64# (remap c) of + 0 -> do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + A.unsafeWrite dst (dstOff + 3) m3 + pure $ dstOff + 4 + i -> writeMapping i dstOff + inner (srcOff + 4) dstOff' + + writeMapping :: Int64 -> Int -> ST s Int + writeMapping 0 dstOff = pure dstOff + writeMapping i dstOff = do + let (ch, j) = chopOffChar i + d <- unsafeWrite dst dstOff ch + writeMapping j (dstOff + d) + + chopOffChar :: Int64 -> (Char, Int64) + chopOffChar ab = (chr a, ab `shiftR` 21) + where + chr (Exts.I# n) = Exts.C# (Exts.chr# n) + mask = (1 `shiftL` 21) - 1 + a = P.fromIntegral $ ab .&. mask +{-# INLINE caseConvert #-} + + +-- | /O(n)/ Convert a string to folded case. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +toCaseFoldNonEmpty :: Text -> Text +toCaseFoldNonEmpty = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) foldMapping xs +{-# INLINE toCaseFoldNonEmpty #-} + +-- | /O(n)/ Convert a string to lower case, using simple case +-- conversion. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +toLowerNonEmpty :: Text -> Text +toLowerNonEmpty = \xs -> caseConvert (\w -> if w - 65 <= 25 then w + 32 else w) lowerMapping xs +{-# INLINE toLowerNonEmpty #-} + +-- | /O(n)/ Convert a string to upper case, using simple case +-- conversion. +-- Assume that the @Text@ is non-empty. The returned @Text@ is guaranteed to be non-empty. +toUpperNonEmpty :: Text -> Text +toUpperNonEmpty = \xs -> caseConvert (\w -> if w - 97 <= 25 then w - 32 else w) upperMapping xs +{-# INLINE toUpperNonEmpty #-} + +-- | /O(n)/ 'filter_', applied to a continuation, a predicate and a @Text@, +-- calls the continuation with the @Text@ containing only the characters satisfying the predicate. +filter_ :: forall a. (A.Array -> Int -> Int -> a) -> (Char -> Bool) -> Text -> a +filter_ mkText p = go + where + go (Text src o l) = runST $ do + -- It's tempting to allocate l elements at once and avoid resizing. + -- However, this can be unacceptable in scenarios where a huge array + -- is filtered with a rare predicate, resulting in a much shorter buffer. + let !dstLen = min l 64 + dst <- A.new dstLen + outer dst dstLen o 0 + where + outer :: forall s. A.MArray s -> Int -> Int -> Int -> ST s a + outer !dst !dstLen = inner + where + inner !srcOff !dstOff + | srcOff >= o + l = do + A.shrinkM dst dstOff + arr <- A.unsafeFreeze dst + return $ mkText arr 0 dstOff + | dstOff + 4 > dstLen = do + -- Double size of the buffer, unless it becomes longer than + -- source string. Ensure to extend it by least 4 bytes. + let !dstLen' = dstLen + max 4 (min (l + o - srcOff) dstLen) + dst' <- A.resizeM dst dstLen' + outer dst' dstLen' srcOff dstOff + -- In case of success, filter writes exactly the same character + -- it just read (this is not a case for map, for example). + -- We leverage this fact below: no need to decode Char back into UTF8, + -- just copy bytes from input. + | otherwise = do + let m0 = A.unsafeIndex src srcOff + m1 = A.unsafeIndex src (srcOff + 1) + m2 = A.unsafeIndex src (srcOff + 2) + m3 = A.unsafeIndex src (srcOff + 3) + !d = utf8LengthByLeader m0 + case d of + 1 -> do + let !c = unsafeChr8 m0 + if not (p c) then inner (srcOff + 1) dstOff else do + A.unsafeWrite dst dstOff m0 + inner (srcOff + 1) (dstOff + 1) + 2 -> do + let !c = chr2 m0 m1 + if not (p c) then inner (srcOff + 2) dstOff else do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + inner (srcOff + 2) (dstOff + 2) + 3 -> do + let !c = chr3 m0 m1 m2 + if not (p c) then inner (srcOff + 3) dstOff else do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + inner (srcOff + 3) (dstOff + 3) + _ -> do + let !c = chr4 m0 m1 m2 m3 + if not (p c) then inner (srcOff + 4) dstOff else do + A.unsafeWrite dst dstOff m0 + A.unsafeWrite dst (dstOff + 1) m1 + A.unsafeWrite dst (dstOff + 2) m2 + A.unsafeWrite dst (dstOff + 3) m3 + inner (srcOff + 4) (dstOff + 4) +{-# INLINE filter_ #-} diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 2195793a..0ba70d93 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -232,6 +232,8 @@ import Data.Text.Internal.Lazy.Fusion (stream, unstream) import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldlChunks, foldrChunks, smallChunkSize, defaultChunkSize, equal) import Data.Text.Internal (firstf, safe, text) +import Data.Text.Internal.Reverse (reverseNonEmpty) +import Data.Text.Internal.Transformation (mapNonEmpty, toCaseFoldNonEmpty, toLowerNonEmpty, toUpperNonEmpty, filter_) import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8) import Data.Text.Internal.Lazy.Search (indices) import qualified GHC.CString as GHC @@ -577,7 +579,7 @@ compareLength t c = S.compareLengthI (stream t) c -- each element of @t@. Performs replacement on -- invalid scalar values. map :: (Char -> Char) -> Text -> Text -map f = foldrChunks (Chunk . T.map f) Empty +map f = foldrChunks (Chunk . mapNonEmpty f) Empty {-# INLINE [1] map #-} {-# RULES @@ -663,7 +665,7 @@ reverse :: Text -> Text reverse = rev Empty where rev a Empty = a - rev a (Chunk t ts) = rev (Chunk (T.reverse t) a) ts + rev a (Chunk t ts) = rev (Chunk (reverseNonEmpty t) a) ts -- | /O(m+n)/ Replace every non-overlapping occurrence of @needle@ in -- @haystack@ with @replacement@. @@ -728,7 +730,7 @@ replace s d = intercalate d . splitOn s -- case folded to the Greek small letter letter mu (U+03BC) instead of -- itself. toCaseFold :: Text -> Text -toCaseFold = foldrChunks (\chnk acc -> Chunk (T.toCaseFold chnk) acc) Empty +toCaseFold = foldrChunks (\chnk acc -> Chunk (toCaseFoldNonEmpty chnk) acc) Empty {-# INLINE toCaseFold #-} -- | /O(n)/ Convert a string to lower case, using simple case @@ -739,7 +741,7 @@ toCaseFold = foldrChunks (\chnk acc -> Chunk (T.toCaseFold chnk) acc) Empty -- to the sequence Latin small letter i (U+0069) followed by combining -- dot above (U+0307). toLower :: Text -> Text -toLower = foldrChunks (\chnk acc -> Chunk (T.toLower chnk) acc) Empty +toLower = foldrChunks (\chnk acc -> Chunk (toLowerNonEmpty chnk) acc) Empty {-# INLINE toLower #-} -- | /O(n)/ Convert a string to upper case, using simple case @@ -749,7 +751,7 @@ toLower = foldrChunks (\chnk acc -> Chunk (T.toLower chnk) acc) Empty -- instance, the German eszett (U+00DF) maps to the two-letter -- sequence SS. toUpper :: Text -> Text -toUpper = foldrChunks (\chnk acc -> Chunk (T.toUpper chnk) acc) Empty +toUpper = foldrChunks (\chnk acc -> Chunk (toUpperNonEmpty chnk) acc) Empty {-# INLINE toUpper #-} @@ -1682,7 +1684,7 @@ stripSuffix p t = reverse `fmap` stripPrefix (reverse p) (reverse t) -- returns a 'Text' containing those characters that satisfy the -- predicate. filter :: (Char -> Bool) -> Text -> Text -filter p = foldrChunks (chunk . T.filter p) Empty +filter p = foldrChunks (chunk . filter_ T.Text p) Empty {-# INLINE [1] filter #-} {-# RULES diff --git a/src/Data/Text/Show.hs b/src/Data/Text/Show.hs index 92aef473..f0996914 100644 --- a/src/Data/Text/Show.hs +++ b/src/Data/Text/Show.hs @@ -24,7 +24,7 @@ module Data.Text.Show ) where import Control.Monad.ST (ST, runST) -import Data.Text.Internal (Text(..), empty_, safe, pack) +import Data.Text.Internal (Text(..), empty, safe, pack) import Data.Text.Internal.Encoding.Utf8 (utf8Length) import Data.Text.Internal.Fusion (stream) import Data.Text.Internal.Unsafe.Char (unsafeWrite) @@ -123,7 +123,7 @@ foreign import capi unsafe "string.h strlen" c_strlen :: CString -> CSize pack (GHC.unpackCStringUtf8# a) = unpackCString# a #-} {-# RULES "TEXT empty literal" - pack [] = empty_ #-} + pack [] = empty #-} {-# RULES "TEXT singleton literal" forall a. pack [a] = singleton a #-} diff --git a/tests/Tests.hs b/tests/Tests.hs index 17e132e6..8ff6aca3 100644 --- a/tests/Tests.hs +++ b/tests/Tests.hs @@ -9,6 +9,7 @@ import Test.Tasty (defaultMain, testGroup) import qualified Tests.Lift as Lift import qualified Tests.Properties as Properties import qualified Tests.Regressions as Regressions +import qualified Tests.ShareEmpty as ShareEmpty import qualified Tests.RebindableSyntaxTest as RST main :: IO () @@ -16,5 +17,6 @@ main = defaultMain $ testGroup "All" [ Lift.tests , Properties.tests , Regressions.tests + , ShareEmpty.tests , RST.tests ] diff --git a/tests/Tests/ShareEmpty.hs b/tests/Tests/ShareEmpty.hs new file mode 100644 index 00000000..efeea75f --- /dev/null +++ b/tests/Tests/ShareEmpty.hs @@ -0,0 +1,126 @@ +{-# LANGUAGE MagicHash #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE TemplateHaskell #-} +{-# LANGUAGE BangPatterns #-} + +module Tests.ShareEmpty + ( tests + ) where + +import Control.Exception (evaluate) +import Data.Text +import Language.Haskell.TH.Syntax (lift) +import Test.Tasty.HUnit (testCase, assertFailure, assertEqual) +import Test.Tasty (TestTree, testGroup) +import GHC.Exts +import GHC.Stack +import qualified Data.List as L +import qualified Data.Text as T + + +-- | assert that a text value is represented by the same pointer +-- as the 'empty' value. +assertPtrEqEmpty :: HasCallStack => Text -> IO () +assertPtrEqEmpty t = do + t' <- evaluate t + empty' <- evaluate empty + assertEqual "" empty' t' + case reallyUnsafePtrEquality# empty' t' of + 1# -> pure () + _ -> assertFailure "Pointers are not equal" +{-# NOINLINE assertPtrEqEmpty #-} + +tests :: TestTree +tests = testGroup "empty Text values are shared" + [ testCase "empty = empty" $ assertPtrEqEmpty T.empty + , testCase "pack \"\" = empty" $ assertPtrEqEmpty $ T.pack "" + , testCase "fromString \"\" = empty" $ assertPtrEqEmpty $ fromString "" + , testCase "$(lift \"\") = empty" $ assertPtrEqEmpty $ $(lift (pack "")) + , testCase "tail of a singleton = empty" $ assertPtrEqEmpty $ T.tail "a" + , testCase "init of a singleton = empty" $ assertPtrEqEmpty $ T.init "b" + , testCase "map _ empty = empty" $ assertPtrEqEmpty $ T.map id empty + , testCase "intercalate _ [] = empty" $ assertPtrEqEmpty $ T.intercalate ", " [] + , testCase "intersperse _ empty = empty" $ assertPtrEqEmpty $ T.intersperse ',' "" + , testCase "reverse empty = empty" $ assertPtrEqEmpty $ + T.reverse empty + , testCase "replace _ _ empty = empty" $ assertPtrEqEmpty $ + T.replace "needle" "replacement" empty + , testCase "toCaseFold empty = empty" $ assertPtrEqEmpty $ T.toCaseFold "" + , testCase "toLower empty = empty" $ assertPtrEqEmpty $ T.toLower "" + , testCase "toUpper empty = empty" $ assertPtrEqEmpty $ T.toUpper "" + , testCase "toTitle empty = empty" $ assertPtrEqEmpty $ T.toTitle "" + , testCase "justifyLeft 0 _ empty = empty" $ assertPtrEqEmpty $ + justifyLeft 0 ' ' empty + , testCase "justifyRight 0 _ empty = empty" $ assertPtrEqEmpty $ + justifyRight 0 ' ' empty + , testCase "center 0 _ empty = empty" $ assertPtrEqEmpty $ + T.center 0 ' ' empty + , testCase "transpose [empty] = [empty]" $ mapM_ assertPtrEqEmpty $ + T.transpose [empty] + , testCase "concat [] = empty" $ assertPtrEqEmpty $ T.concat [] + , testCase "concat [empty] = empty" $ assertPtrEqEmpty $ T.concat [empty] + , testCase "replicate 0 _ = empty" $ assertPtrEqEmpty $ T.replicate 0 "x" + , testCase "replicate _ empty = empty" $ assertPtrEqEmpty $ T.replicate 10 empty + , testCase "unfoldr (const Nothing) _ = empty" $ assertPtrEqEmpty $ + T.unfoldr (const Nothing) () + , testCase "take 0 _ = empty" $ assertPtrEqEmpty $ + T.take 0 "xyz" + , testCase "takeEnd 0 _ = empty" $ assertPtrEqEmpty $ + T.takeEnd 0 "xyz" + , testCase "takeWhile (const False) _ = empty" $ assertPtrEqEmpty $ + T.takeWhile (const False) "xyz" + , testCase "takeWhileEnd (const False) _ = empty" $ assertPtrEqEmpty $ + T.takeWhileEnd (const False) "xyz" + , testCase "drop n x = empty where n > len x" $ assertPtrEqEmpty $ + T.drop 5 "xyz" + , testCase "dropEnd n x = empty where n > len x" $ assertPtrEqEmpty $ + T.dropEnd 5 "xyz" + , testCase "dropWhile (const True) x = empty" $ assertPtrEqEmpty $ + T.dropWhile (const True) "xyz" + , testCase "dropWhileEnd (const True) x = empty" $ assertPtrEqEmpty $ + dropWhileEnd (const True) "xyz" + , testCase "dropAround _ empty = empty" $ assertPtrEqEmpty $ + dropAround (const True) empty + , testCase "stripStart empty = empty" $ assertPtrEqEmpty $ T.stripStart empty + , testCase "stripEnd empty = empty" $ assertPtrEqEmpty $ T.stripEnd empty + , testCase "strip empty = empty" $ assertPtrEqEmpty $ T.strip empty + , testCase "fst (splitAt 0 _) = empty" $ assertPtrEqEmpty $ fst $ T.splitAt 0 "123" + , testCase "snd (splitAt n x) = empty where n > len x" $ assertPtrEqEmpty $ + snd $ T.splitAt 5 "123" + , testCase "fst (span (const False) _) = empty" $ assertPtrEqEmpty $ + fst $ T.span (const False) "123" + , testCase "snd (span (const True) _) = empty" $ assertPtrEqEmpty $ + snd $ T.span (const True) "123" + , testCase "fst (break (const False) _) = empty" $ assertPtrEqEmpty $ + fst $ T.span (const False) "123" + , testCase "snd (break (const True) _) = empty" $ assertPtrEqEmpty $ + snd $ T.span (const True) "123" + , testCase "fst (spanM (const $ pure False) _) = empty" $ + assertPtrEqEmpty . fst =<< T.spanM (const $ pure False) "123" + , testCase "snd (spanM (const $ pure True) _) = empty" $ + assertPtrEqEmpty . snd =<< T.spanM (const $ pure True) "123" + , testCase "fst (spanEndM (const $ pure True) _) = empty" $ + assertPtrEqEmpty . fst =<< T.spanEndM (const $ pure True) "123" + , testCase "snd (spanEndM (const $ pure False) _) = empty" $ + assertPtrEqEmpty . snd =<< T.spanEndM (const $ pure False) "123" + , testCase "groupBy _ empty = [empty]" $ mapM_ assertPtrEqEmpty $ T.groupBy (==) empty + , testCase "inits empty = [empty]" $ mapM_ assertPtrEqEmpty $ T.inits empty + , testCase "inits _ = [empty, ...]" $ assertPtrEqEmpty $ L.head $ T.inits "123" + , testCase "tails empty = [empty]" $ mapM_ assertPtrEqEmpty $ T.tails empty + , testCase "tails _ = [..., empty]" $ assertPtrEqEmpty $ L.last $ T.tails "123" + , testCase "tails _ = [..., empty]" $ assertPtrEqEmpty $ L.last $ T.tails "123" + , testCase "split _ empty = [empty]" $ mapM_ assertPtrEqEmpty $ T.split (== 'a') "" + , testCase "filter (const False) _ = empty" $ assertPtrEqEmpty $ T.filter (const False) "1234" + , testCase "zipWith const empty empty = empty" $ assertPtrEqEmpty $ T.zipWith const "" "" + , testCase "unlines [] = empty" $ assertPtrEqEmpty $ T.unlines [] + , testCase "unwords [] = empty" $ assertPtrEqEmpty $ T.unwords [] + , testCase "stripPrefix empty empty = Just empty" $ mapM_ assertPtrEqEmpty $ + T.stripPrefix empty empty + , testCase "stripSuffix empty empty = Just empty" $ mapM_ assertPtrEqEmpty $ + T.stripSuffix empty empty + , testCase "commonPrefixes \"xyz\" \"123\" = Just (_, empty, _)" $ + mapM_ (assertPtrEqEmpty . (\(_, x, _) -> x)) $ T.commonPrefixes "xyz" "123" + , testCase "commonPrefixes \"xyz\" \"xyz\" = Just (_, _, empty)" $ + mapM_ (assertPtrEqEmpty . (\(_, _, x) -> x)) $ T.commonPrefixes "xyz" "xyz" + , testCase "copy empty = empty" $ assertPtrEqEmpty $ T.copy "" + ] diff --git a/text.cabal b/text.cabal index bad26bb4..b73c498d 100644 --- a/text.cabal +++ b/text.cabal @@ -207,6 +207,7 @@ library other-modules: Data.Text.Show Data.Text.Internal.Reverse + Data.Text.Internal.Transformation build-depends: array >= 0.3 && < 0.6, @@ -277,6 +278,7 @@ test-suite tests Tests.RebindableSyntaxTest Tests.Regressions Tests.SlowFunctions + Tests.ShareEmpty Tests.Utils build-depends: