Skip to content

Commit

Permalink
Add lazy break, get rid of lazy splitTimes and splitTimesEnd, and add…
Browse files Browse the repository at this point in the history
… tests.

--HG--
extra : convert_revision : bf4feefea2f57e8777338a471eba1de81e5ac69b
  • Loading branch information
bos committed Oct 7, 2009
1 parent 3a9f06b commit 77d1877
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 55 deletions.
96 changes: 41 additions & 55 deletions Data/Text/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ module Data.Text.Lazy
, stripEnd
, splitAt
, spanBy
, break
, breakBy
, group
, groupBy
Expand All @@ -127,8 +128,6 @@ module Data.Text.Lazy
-- ** Breaking into many substrings
-- $split
, split
, splitTimes
, splitTimesEnd
, splitBy
, chunksOf
-- , breakSubstring
Expand Down Expand Up @@ -166,7 +165,7 @@ module Data.Text.Lazy

import Prelude (Char, Bool(..), Int, Maybe(..), String,
Eq(..), Ord(..), Read(..), Show(..),
(&&), (||), (+), (-), (.), ($), (++),
(&&), (+), (-), (.), ($), (++),
div, flip, fromIntegral, not, otherwise)
import qualified Prelude as P
import Data.Int (Int64)
Expand All @@ -175,11 +174,13 @@ import Data.Char (isSpace)
import Data.Monoid (Monoid(..))
import Data.String (IsString(..))
import qualified Data.Text as T
import qualified Data.Text.Internal as T
import qualified Data.Text.Fusion.Common as S
import qualified Data.Text.Unsafe as T
import qualified Data.Text.Lazy.Fusion as S
import Data.Text.Lazy.Fusion (stream, unstream)
import Data.Text.Lazy.Internal
import Data.Text.Lazy.Internal (Text(..), chunk, empty, foldlChunks, foldrChunks)
import Data.Text.Internal (textP)
import Data.Text.Lazy.Search (indices)

instance Eq Text where
Expand Down Expand Up @@ -813,6 +814,42 @@ splitAt = loop
in (Chunk t ts', ts'')
where len = fromIntegral (T.length t)

-- | /O(n+m)/ Find the first instance of @needle@ (which must be
-- non-'null') in @haystack@. The first element of the returned tuple
-- is the prefix of @haystack@ before @needle@ is matched. The second
-- is the remainder of @haystack@, starting with the match.
--
-- Examples:
--
-- > break "::" "a::b::c" ==> ("a", "::b::c")
-- > break "/" "foobar" ==> ("foobar", "")
--
-- Laws:
--
-- > append prefix match == haystack
-- > where (prefix, match) = break needle haystack
--
-- If you need to break a string by a substring repeatedly (e.g. you
-- want to break on every instance of a substring), use 'find'
-- instead, as it has lower startup overhead.
--
-- In (unlikely) bad cases, this function's time complexity degrades
-- towards /O(n*m)/.
break :: Text -> Text -> (Text, Text)
break pat src
| null pat = emptyError "break"
| otherwise = case indices pat src of
[] -> (src, empty)
(x:_) -> go x src
where
go _ Empty = (empty, empty)
go x (Chunk c@(T.Text arr off len) cs)
| y >= len = let (h,t) = go (x-fromIntegral len) cs
in (Chunk c h, t)
| otherwise = (chunk (textP arr off y) empty,
chunk (textP arr (off+y) (len-y)) cs)
where y = fromIntegral x

-- | /O(n)/ 'breakBy' is like 'spanBy', but the prefix returned is over
-- elements that fail the predicate @p@.
breakBy :: (Char -> Bool) -> Text -> (Text, Text)
Expand Down Expand Up @@ -913,57 +950,6 @@ split pat src0
split (singleton c) t = splitBy (==c) t
#-}

-- | /O(m)*O(n)/ Break a 'Text' into pieces at most @k@ times,
-- treating the first 'Text' argument as the delimiter to break on,
-- and consuming the delimiter. The last element of the list contains
-- the remaining text after the number of times to split has been
-- reached. A value of zero or less for @k@ causes no splitting to
-- occur. An empty delimiter is invalid, and will cause an error to
-- be raised.
--
-- Examples:
--
-- > splitTimes 0 "//" "a//b//c" == ["a//b//c"]
-- > splitTimes 2 ":" "a:b:c:d:e" == ["a","b","c:d:e"]
-- > splitTimes 100 "???" "a????b" == ["a","?b"]
--
-- and
--
-- > intercalate s . splitTimes k s == id
splitTimes :: Int64 -- ^ Maximum number of times to split
-> Text -- ^ Text to split on
-> Text -- ^ Input text
-> [Text]
splitTimes k pat src0
| k <= 0 = [src0]
| l == 0 = emptyError "splitTimes"
| otherwise = go k src0
where
l = length pat
go !i src = search 0 src
where
search !n !s
| i == 0 || null s = [src] -- not found or limit reached
| pat `isPrefixOf` s = take n src : go (i-1) (drop l s)
| otherwise = search (n+1) (tail s)
{-# INLINE splitTimes #-}

-- | /O(m)*O(n)/ Break a 'Text' into pieces at most @k@ times, like
-- 'splitTimes', but start from the end of the input and work towards
-- the start.
--
-- Examples:
--
-- > splitTimes 2 "::" "a::b::c::d::e" == ["a","b","c::d::e"]
-- > splitTimesEnd 2 "::" "a::b::c::d::e" == ["a::b::c","d","e"]
splitTimesEnd :: Int64 -- ^ Maximum number of times to split
-> Text -- ^ Text to split on
-> Text -- ^ Input text
-> [Text]
splitTimesEnd k pat src =
L.reverse . L.map reverse $ splitTimes k (reverse pat) (reverse src)
{-# INLINE splitTimesEnd #-}

-- | /O(n)/ Splits a 'Text' into components delimited by separators,
-- where the predicate returns True for a separator element. The
-- resulting components do not contain the separators. Two adjacent
Expand Down
2 changes: 2 additions & 0 deletions Data/Text/Lazy/Search.hs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
-- Fast substring search for lazy 'Text', based on work by Boyer,
-- Moore, Horspool, Sunday, and Lundh. Adapted from the strict
-- implementation.
--
-- /Note/: this is currently too strict!

module Data.Text.Lazy.Search
(
Expand Down
10 changes: 10 additions & 0 deletions tests/Properties.hs
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,13 @@ tl_spanBy p = L.span p `eqP` (unpack2 . TL.spanBy p)
t_break_id s = squid `eq` (uncurry T.append . T.break s)
where squid t | T.null s = error "empty"
| otherwise = t
tl_break_id s = squid `eq` (uncurry TL.append . TL.break s)
where squid t | TL.null s = error "empty"
| otherwise = t
t_break_start (NotEmpty s) t = let (_,m) = T.break s t
in T.null m || s `T.isPrefixOf` m
tl_break_start (NotEmpty s) t = let (_,m) = TL.break s t
in TL.null m || s `TL.isPrefixOf` m
t_breakBy p = L.break p `eqP` (unpack2 . T.breakBy p)
tl_breakBy p = L.break p `eqP` (unpack2 . TL.breakBy p)
t_group = L.group `eqP` (map unpackS . T.group)
Expand Down Expand Up @@ -770,6 +777,9 @@ tests = [
testProperty "t_spanBy" t_spanBy,
testProperty "tl_spanBy" tl_spanBy,
testProperty "t_break_id" t_break_id,
testProperty "tl_break_id" tl_break_id,
testProperty "t_break_start" t_break_start,
testProperty "tl_break_start" tl_break_start,
testProperty "t_breakBy" t_breakBy,
testProperty "tl_breakBy" tl_breakBy,
testProperty "t_group" t_group,
Expand Down

0 comments on commit 77d1877

Please sign in to comment.