Skip to content

Commit

Permalink
Rename split to bytestring-like name
Browse files Browse the repository at this point in the history
  • Loading branch information
bos committed Nov 8, 2010
1 parent 15eaf1f commit 6fa1d91
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 81 deletions.
40 changes: 20 additions & 20 deletions Data/Text.hs
Expand Up @@ -128,8 +128,8 @@ module Data.Text

-- ** Breaking into many substrings
-- $split
, splitOn
, split
, splitBy
, chunksOf

-- ** Breaking into lines and words
Expand Down Expand Up @@ -554,7 +554,7 @@ replace :: Text -- ^ Text to search for
-> Text -- ^ Replacement text
-> Text -- ^ Input text
-> Text
replace s d = intercalate d . split s
replace s d = intercalate d . splitOn s
{-# INLINE replace #-}

-- ----------------------------------------------------------------------------
Expand Down Expand Up @@ -1075,46 +1075,46 @@ tails t | null t = [empty]
--
-- Examples:
--
-- > split "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
-- > split "aaa" "aaaXaaaXaaaXaaa" == ["","X","X","X",""]
-- > split "x" "x" == ["",""]
-- > splitOn "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
-- > splitOn "aaa" "aaaXaaaXaaaXaaa" == ["","X","X","X",""]
-- > splitOn "x" "x" == ["",""]
--
-- and
--
-- > intercalate s . split s == id
-- > split (singleton c) == splitBy (==c)
-- > intercalate s . splitOn s == id
-- > splitOn (singleton c) == split (==c)
--
-- In (unlikely) bad cases, this function's time complexity degrades
-- towards /O(n*m)/.
split :: Text -> Text -> [Text]
split pat@(Text _ _ l) src@(Text arr off len)
| l <= 0 = emptyError "split"
| isSingleton pat = splitBy (== unsafeHead pat) src
splitOn :: Text -> Text -> [Text]
splitOn pat@(Text _ _ l) src@(Text arr off len)
| l <= 0 = emptyError "splitOn"
| isSingleton pat = split (== unsafeHead pat) src
| otherwise = go 0 (indices pat src)
where
go !s (x:xs) = textP arr (s+off) (x-s) : go (x+l) xs
go s _ = [textP arr (s+off) (len-s)]
{-# INLINE [1] split #-}
{-# INLINE [1] splitOn #-}

{-# RULES
"TEXT split/singleton -> splitBy/==" [~1] forall c t.
split (singleton c) t = splitBy (==c) t
"TEXT splitOn/singleton -> split/==" [~1] forall c t.
splitOn (singleton c) t = split (==c) t
#-}

-- | /O(n)/ Splits a 'Text' into components delimited by separators,
-- where the predicate returns True for a separator element. The
-- resulting components do not contain the separators. Two adjacent
-- separators result in an empty component in the output. eg.
--
-- > splitBy (=='a') "aabbaca" == ["","","bb","c",""]
-- > splitBy (=='a') "" == [""]
splitBy :: (Char -> Bool) -> Text -> [Text]
splitBy _ t@(Text _off _arr 0) = [t]
splitBy p t = loop t
-- > split (=='a') "aabbaca" == ["","","bb","c",""]
-- > split (=='a') "" == [""]
split :: (Char -> Bool) -> Text -> [Text]
split _ t@(Text _off _arr 0) = [t]
split p t = loop t
where loop s | null s' = [l]
| otherwise = l : loop (unsafeTail s')
where (l, s') = break p s
{-# INLINE splitBy #-}
{-# INLINE split #-}

-- | /O(n)/ Splits a 'Text' into components of length @k@. The last
-- element may be shorter than the other chunks, depending on the
Expand Down
50 changes: 25 additions & 25 deletions Data/Text/Lazy.hs
Expand Up @@ -133,8 +133,8 @@ module Data.Text.Lazy

-- ** Breaking into many substrings
-- $split
, splitOn
, split
, splitBy
, chunksOf
-- , breakSubstring

Expand Down Expand Up @@ -555,7 +555,7 @@ replace :: Text -- ^ Text to search for
-> Text -- ^ Replacement text
-> Text -- ^ Input text
-> Text
replace s d = intercalate d . split s
replace s d = intercalate d . splitOn s
{-# INLINE replace #-}

-- ----------------------------------------------------------------------------
Expand Down Expand Up @@ -1104,54 +1104,54 @@ tails ts@(Chunk t ts')
--
-- Examples:
--
-- > split "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
-- > split "aaa" "aaaXaaaXaaaXaaa" == ["","X","X","X",""]
-- > split "x" "x" == ["",""]
-- > splitOn "\r\n" "a\r\nb\r\nd\r\ne" == ["a","b","d","e"]
-- > splitOn "aaa" "aaaXaaaXaaaXaaa" == ["","X","X","X",""]
-- > splitOn "x" "x" == ["",""]
--
-- and
--
-- > intercalate s . split s == id
-- > split (singleton c) == splitBy (==c)
-- > intercalate s . splitOn s == id
-- > splitOn (singleton c) == split (==c)
--
-- This function is strict in its first argument, and lazy in its
-- second.
--
-- In (unlikely) bad cases, this function's time complexity degrades
-- towards /O(n*m)/.
split :: Text -- ^ Text to split on
-> Text -- ^ Input text
-> [Text]
split pat src
| null pat = emptyError "split"
| isSingleton pat = splitBy (== head pat) src
splitOn :: Text -- ^ Text to split on
-> Text -- ^ Input text
-> [Text]
splitOn pat src
| null pat = emptyError "splitOn"
| isSingleton pat = split (== head pat) src
| otherwise = go 0 (indices pat src) src
where
go _ [] cs = [cs]
go !i (x:xs) cs = let h :*: t = splitAtWord (x-i) cs
in h : go (x+l) xs (dropWords l t)
l = foldlChunks (\a (T.Text _ _ b) -> a + fromIntegral b) 0 pat
{-# INLINE [1] split #-}
{-# INLINE [1] splitOn #-}

{-# RULES
"LAZY TEXT split/singleton -> splitBy/==" [~1] forall c t.
split (singleton c) t = splitBy (==c) t
"LAZY TEXT splitOn/singleton -> split/==" [~1] forall c t.
splitOn (singleton c) t = split (==c) t
#-}

-- | /O(n)/ Splits a 'Text' into components delimited by separators,
-- where the predicate returns True for a separator element. The
-- resulting components do not contain the separators. Two adjacent
-- separators result in an empty component in the output. eg.
--
-- > splitBy (=='a') "aabbaca" == ["","","bb","c",""]
-- > splitBy (=='a') [] == [""]
splitBy :: (Char -> Bool) -> Text -> [Text]
splitBy _ Empty = [Empty]
splitBy p (Chunk t0 ts0) = comb [] (T.splitBy p t0) ts0
-- > split (=='a') "aabbaca" == ["","","bb","c",""]
-- > split (=='a') [] == [""]
split :: (Char -> Bool) -> Text -> [Text]
split _ Empty = [Empty]
split p (Chunk t0 ts0) = comb [] (T.split p t0) ts0
where comb acc (s:[]) Empty = revChunks (s:acc) : []
comb acc (s:[]) (Chunk t ts) = comb (s:acc) (T.splitBy p t) ts
comb acc (s:[]) (Chunk t ts) = comb (s:acc) (T.split p t) ts
comb acc (s:ss) ts = revChunks (s:acc) : comb [] ss ts
comb _ [] _ = impossibleError "splitBy"
{-# INLINE splitBy #-}
comb _ [] _ = impossibleError "split"
{-# INLINE split #-}

-- | /O(n)/ Splits a 'Text' into components of length @k@. The last
-- element may be shorter than the other chunks, depending on the
Expand All @@ -1178,7 +1178,7 @@ lines t = let (l,t') = break ((==) '\n') t
-- | /O(n)/ Breaks a 'Text' up into a list of words, delimited by 'Char's
-- representing white space.
words :: Text -> [Text]
words = L.filter (not . null) . splitBy isSpace
words = L.filter (not . null) . split isSpace
{-# INLINE words #-}

-- | /O(n)/ Joins lines, after appending a terminating newline to
Expand Down
58 changes: 29 additions & 29 deletions tests/Properties.hs
Expand Up @@ -265,13 +265,13 @@ t_reverse = L.reverse `eqP` (unpackS . T.reverse)
tl_reverse = L.reverse `eqP` (unpackS . TL.reverse)
t_reverse_short n = L.reverse `eqP` (unpackS . S.reverse . shorten n . S.stream)

t_replace s d = (L.intercalate d . split s) `eqP`
t_replace s d = (L.intercalate d . splitOn s) `eqP`
(unpackS . T.replace (T.pack s) (T.pack d))
tl_replace s d = (L.intercalate d . split s) `eqP`
tl_replace s d = (L.intercalate d . splitOn s) `eqP`
(unpackS . TL.replace (TL.pack s) (TL.pack d))

split :: (Eq a) => [a] -> [a] -> [[a]]
split pat src0
splitOn :: (Eq a) => [a] -> [a] -> [[a]]
splitOn pat src0
| l == 0 = error "empty"
| otherwise = go src0
where
Expand Down Expand Up @@ -523,21 +523,21 @@ sl_filterCount c = (L.genericLength . L.filter (==c)) `eqP` SL.countChar c
t_findCount s = (L.length . T.breakOnAll s) `eq` T.count s
tl_findCount s = (L.genericLength . TL.breakOnAll s) `eq` TL.count s

t_split_split s = (T.split s `eq` Slow.split s) . T.intercalate s
tl_split_split s = ((TL.split (TL.fromStrict s) . TL.fromStrict) `eq`
(map TL.fromStrict . T.split s)) . T.intercalate s
t_split_i (NotEmpty t) = id `eq` (T.intercalate t . T.split t)
tl_split_i (NotEmpty t) = id `eq` (TL.intercalate t . TL.split t)

t_splitBy p = splitBy p `eqP` (map unpackS . T.splitBy p)
t_splitBy_count c = (L.length . T.splitBy (==c)) `eq`
((1+) . T.count (T.singleton c))
t_splitBy_split c = T.splitBy (==c) `eq` T.split (T.singleton c)
tl_splitBy p = splitBy p `eqP` (map unpackS . TL.splitBy p)

splitBy :: (a -> Bool) -> [a] -> [[a]]
splitBy _ [] = [[]]
splitBy p xs = loop xs
t_splitOn_split s = (T.splitOn s `eq` Slow.splitOn s) . T.intercalate s
tl_splitOn_split s = ((TL.splitOn (TL.fromStrict s) . TL.fromStrict) `eq`
(map TL.fromStrict . T.splitOn s)) . T.intercalate s
t_splitOn_i (NotEmpty t) = id `eq` (T.intercalate t . T.splitOn t)
tl_splitOn_i (NotEmpty t) = id `eq` (TL.intercalate t . TL.splitOn t)

t_split p = split p `eqP` (map unpackS . T.split p)
t_split_count c = (L.length . T.split (==c)) `eq`
((1+) . T.count (T.singleton c))
t_split_splitOn c = T.split (==c) `eq` T.splitOn (T.singleton c)
tl_split p = split p `eqP` (map unpackS . TL.split p)

split :: (a -> Bool) -> [a] -> [[a]]
split _ [] = [[]]
split p xs = loop xs
where loop s | null s' = [l]
| otherwise = l : loop (tail s')
where (l, s') = break p s
Expand Down Expand Up @@ -613,8 +613,8 @@ tl_index s = forAll (choose (-l,l*2))
where l = L.length s

t_findIndex p = L.findIndex p `eqP` T.findIndex p
t_count (NotEmpty t) = (subtract 1 . L.length . T.split t) `eq` T.count t
tl_count (NotEmpty t) = (subtract 1 . L.genericLength . TL.split t) `eq`
t_count (NotEmpty t) = (subtract 1 . L.length . T.splitOn t) `eq` T.count t
tl_count (NotEmpty t) = (subtract 1 . L.genericLength . TL.splitOn t) `eq`
TL.count t
t_zip s = L.zip s `eqP` T.zip (packS s)
tl_zip s = L.zip s `eqP` TL.zip (packS s)
Expand Down Expand Up @@ -1080,14 +1080,14 @@ tests = [
testProperty "sl_filterCount" sl_filterCount,
testProperty "t_findCount" t_findCount,
testProperty "tl_findCount" tl_findCount,
testProperty "t_split_split" t_split_split,
testProperty "tl_split_split" tl_split_split,
testProperty "t_split_i" t_split_i,
testProperty "tl_split_i" tl_split_i,
testProperty "t_splitBy" t_splitBy,
testProperty "t_splitBy_count" t_splitBy_count,
testProperty "t_splitBy_split" t_splitBy_split,
testProperty "tl_splitBy" tl_splitBy,
testProperty "t_splitOn_split" t_splitOn_split,
testProperty "tl_splitOn_split" tl_splitOn_split,
testProperty "t_splitOn_i" t_splitOn_i,
testProperty "tl_splitOn_i" tl_splitOn_i,
testProperty "t_split" t_split,
testProperty "t_split_count" t_split_count,
testProperty "t_split_splitOn" t_split_splitOn,
testProperty "tl_split" tl_split,
testProperty "t_chunksOf_same_lengths" t_chunksOf_same_lengths,
testProperty "t_chunksOf_length" t_chunksOf_length,
testProperty "tl_chunksOf" tl_chunksOf
Expand Down
14 changes: 7 additions & 7 deletions tests/SlowFunctions.hs
Expand Up @@ -3,7 +3,7 @@
module SlowFunctions
(
indices
, split
, splitOn
) where

import qualified Data.Text as T
Expand All @@ -23,12 +23,12 @@ indices needle@(Text _narr _noff nlen) haystack@(Text harr hoff hlen)
where t = Text harr (hoff+i) (hlen-i)
d = iter_ haystack i

split :: T.Text -- ^ Text to split on
-> T.Text -- ^ Input text
-> [T.Text]
split pat src0
| T.null pat = error "split: empty"
| l == 1 = T.splitBy (== (unsafeHead pat)) src0
splitOn :: T.Text -- ^ Text to split on
-> T.Text -- ^ Input text
-> [T.Text]
splitOn pat src0
| T.null pat = error "splitOn: empty"
| l == 1 = T.split (== (unsafeHead pat)) src0
| otherwise = go src0
where
l = T.length pat
Expand Down

0 comments on commit 6fa1d91

Please sign in to comment.