diff --git a/Data/Text/Lazy.hs b/Data/Text/Lazy.hs index e00df838..15d0df23 100644 --- a/Data/Text/Lazy.hs +++ b/Data/Text/Lazy.hs @@ -716,6 +716,21 @@ drop i t0 unstream (S.drop n (stream t)) = drop n t #-} +-- | /O(n)/ 'dropWords' @n@ returns the suffix with @n@ 'Word16' +-- values dropped, or the empty 'Text' if @n@ is greater than the +-- number of 'Word16' values present. +dropWords :: Int64 -> Text -> Text +dropWords i t0 + | i <= 0 = t0 + | otherwise = drop' i t0 + where drop' 0 ts = ts + drop' _ Empty = Empty + drop' n (Chunk (T.Text arr off len) ts) + | n < len' = chunk (textP arr (off+n') (len-n')) ts + | otherwise = drop' (n - len') ts + where len' = fromIntegral len + n' = fromIntegral n + -- | /O(n)/ 'takeWhile', applied to a predicate @p@ and a 'Text', returns -- the longest prefix (possibly empty) of elements that satisfy @p@. -- This function is subject to array fusion. @@ -945,8 +960,9 @@ split pat src | otherwise = go 0 (indices pat src) src where go _ [] cs = [cs] - go !s (x:xs) cs = let h :*: t = splitAtWord (x-s) cs - in h : go (s+x) xs t + go !i (x:xs) cs = let h :*: t = splitAtWord (x-i) cs + in h : go (x+l) xs (dropWords l t) + l = foldlChunks (\a (T.Text _ _ b) -> a + fromIntegral b) 0 pat {-# INLINE [1] split #-} {-# RULES diff --git a/tests/Properties.hs b/tests/Properties.hs index a1fa12a5..34f48c8e 100644 --- a/tests/Properties.hs +++ b/tests/Properties.hs @@ -411,7 +411,11 @@ tl_tails = L.tails `eqP` (map unpackS . TL.tails) t_findSplit s = T.split s `eq` splitty where splitty t = case T.find s t of (x,xs) -> x : L.map (T.drop (T.length s) . fst) xs -t_split_split s = T.split s `eq` Slow.split s +t_split_split s = unsquare ((T.split s `eq` Slow.split s) . + T.intercalate s) +tl_split_split s = unsquare (((TL.split (chunkify s) . chunkify) `eq` + (map chunkify . T.split s)) . + T.intercalate s) t_split_i (NotEmpty t) = id `eq` (T.intercalate t . T.split t) tl_split_i (NotEmpty t) = id `eq` (TL.intercalate t . TL.split t) @@ -434,7 +438,9 @@ t_chunksOf_same_lengths k = all ((==k) . T.length) . ini . T.chunksOf k t_chunksOf_length k t = len == T.length t || (k <= 0 && len == 0) where len = L.sum . L.map T.length $ T.chunksOf k t -tl_chunksOf k = T.chunksOf k `eq` (map (T.concat . TL.toChunks) . TL.chunksOf (fromIntegral k) . TL.fromChunks . (:[])) +chunkify = TL.fromChunks . (:[]) + +tl_chunksOf k = T.chunksOf k `eq` (map (T.concat . TL.toChunks) . TL.chunksOf (fromIntegral k) . chunkify) t_lines = L.lines `eqP` (map unpackS . T.lines) tl_lines = L.lines `eqP` (map unpackS . TL.lines) @@ -793,6 +799,7 @@ tests = [ testGroup "breaking many" [ testProperty "t_findSplit" t_findSplit, testProperty "t_split_split" t_split_split, + testProperty "tl_split_split" tl_split_split, testProperty "t_split_i" t_split_i, testProperty "tl_split_i" tl_split_i, testProperty "t_splitBy" t_splitBy,