haskell · Bodigrim · Aug 23, 2020 · Jul 13, 2019 · Aug 20, 2020
diff --git a/Data/ByteString.hs b/Data/ByteString.hs
@@ -138,8 +138,6 @@ module Data.ByteString (
 
         -- ** Search for arbitrary substrings
         breakSubstring,         -- :: ByteString -> ByteString -> (ByteString,ByteString)
-        findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
-        findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 
         -- * Searching ByteStrings
 
@@ -1337,10 +1335,9 @@ stripSuffix bs1@(PS _ _ l1) bs2@(PS _ _ l2)
    | bs1 `isSuffixOf` bs2 = Just (unsafeTake (l2 - l1) bs2)
    | otherwise = Nothing
 
--- | Check whether one string is a substring of another. @isInfixOf
--- p s@ is equivalent to @not (null (findSubstrings p s))@.
+-- | Check whether one string is a substring of another.
 isInfixOf :: ByteString -> ByteString -> Bool
-isInfixOf p s = isJust (findSubstring p s)
+isInfixOf p s = null p || not (null $ snd $ breakSubstring p s)
 
 -- | Break a string on a substring, returning a pair of the part of the
 -- string prior to the match, and the rest of the string.
@@ -1349,14 +1346,6 @@ isInfixOf p s = isJust (findSubstring p s)
 --
 -- > break (== c) l == breakSubstring (singleton c) l
 --
--- and:
---
--- > findSubstring s l ==
--- >    if null s then Just 0
--- >              else case breakSubstring s l of
--- >                       (x,y) | null y    -> Nothing
--- >                             | otherwise -> Just (length x)
---
 -- For example, to tokenise a string, dropping delimiters:
 --
 -- > tokenise x y = h : if null t then [] else tokenise x (drop (length x) t)
@@ -1426,51 +1415,6 @@ breakSubstring pat =
             w' = mask .&. ((w `shiftL` 8) .|. b)
     {-# INLINE shift #-}
 
--- | Get the first index of a substring in another string,
---   or 'Nothing' if the string is not found.
---   @findSubstring p s@ is equivalent to @listToMaybe (findSubstrings p s)@.
-findSubstring :: ByteString -- ^ String to search for.
-              -> ByteString -- ^ String to seach in.
-              -> Maybe Int
-findSubstring pat src
-    | null pat && null src = Just 0
-    | null b = Nothing
-    | otherwise = Just (length a)
-  where (a, b) = breakSubstring pat src
-
-{-# DEPRECATED findSubstring "findSubstring is deprecated in favour of breakSubstring." #-}
-
--- | Find the indices of all non-overlapping occurences of a substring in a
--- string.
---
--- Note, prior to @0.10.6.0@ this function returned the indices of all
--- possibly-overlapping matches.
-findSubstrings :: ByteString -- ^ String to search for.
-               -> ByteString -- ^ String to seach in.
-               -> [Int]
-findSubstrings pat src
-    | null pat        = [0 .. ls]
-    | otherwise       = search 0
-  where
-    lp = length pat
-    ls = length src
-    search !n
-        | (n > ls - lp) || null b = []
-        | otherwise = let k = n + length a
-                      in  k : search (k + lp)
-      where
-        (a, b) = breakSubstring pat (unsafeDrop n src)
-
--- In
--- [0.10.6.0](<https://github.com/haskell/bytestring/commit/2160e091e215fecc9177d55a37cd50fc253ba86a?w=1>)
--- 'findSubstrings' was refactored to call an improved 'breakString'
--- implementation, but the refactored code no longer matches overlapping
--- strings.  The behaviour change appears to be inadvertent, but the function
--- had already been deprecated for more than seven years.  At this time
--- (@0.10.10.1@), the deprecation was twelve years in the past.
---
-{-# DEPRECATED findSubstrings "findSubstrings is deprecated in favour of breakSubstring." #-}
-
 -- ---------------------------------------------------------------------
 -- Zipping
 

diff --git a/Data/ByteString/Char8.hs b/Data/ByteString/Char8.hs
@@ -148,8 +148,6 @@ module Data.ByteString.Char8 (
 
         -- ** Search for arbitrary substrings
         breakSubstring,         -- :: ByteString -> ByteString -> (ByteString,ByteString)
-        findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
-        findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 
         -- * Searching ByteStrings
 
@@ -249,7 +247,7 @@ import Data.ByteString (empty,null,length,tail,init,append
                        ,concat,take,drop,splitAt,intercalate
                        ,sort,isPrefixOf,isSuffixOf,isInfixOf
                        ,stripPrefix,stripSuffix
-                       ,findSubstring,findSubstrings,breakSubstring,copy,group
+                       ,breakSubstring,copy,group
 
                        ,getLine, getContents, putStr, interact
                        ,readFile, writeFile, appendFile

diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs
@@ -151,8 +151,6 @@ module Data.ByteString.Lazy (
 
         -- ** Search for arbitrary substrings
 --        isSubstringOf,          -- :: ByteString -> ByteString -> Bool
---        findSubstring,          -- :: ByteString -> ByteString -> Maybe Int
---        findSubstrings,         -- :: ByteString -> ByteString -> [Int]
 
         -- * Searching ByteStrings
 

diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs
@@ -280,80 +280,6 @@ main = do
         ]
       ]
 
-    , bgroup "substrings"
-      [ bgroup "easy"
-        [ bench "easy1"    . nf (uncurry S.findSubstrings)
-                          $ easySubstrings 1 1000000
-        , bench "easy4"    . nf (uncurry S.findSubstrings)
-                          $ easySubstrings 4 1000000
-        , bench "easy16"   . nf (uncurry S.findSubstrings)
-                          $ easySubstrings 16 1000000
-        , bench "easy64"   . nf (uncurry S.findSubstrings)
-                          $ easySubstrings 64 1000000
-        , bench "easy128"  . nf (uncurry S.findSubstrings)
-                          $ easySubstrings 128 1000000
-        , bench "easy1024" . nf (uncurry S.findSubstrings)
-                          $ easySubstrings 1024 1000000
-        ]
-      , bgroup "random"
-        [ bench "random1"    . nf (uncurry S.findSubstrings)
-                          $ randomSubstrings 1 1000000
-        , bench "random4"    . nf (uncurry S.findSubstrings)
-                          $ randomSubstrings 4 1000000
-        , bench "random16"   . nf (uncurry S.findSubstrings)
-                          $ randomSubstrings 16 1000000
-        , bench "random64"   . nf (uncurry S.findSubstrings)
-                          $ randomSubstrings 64 1000000
-        , bench "random128"  . nf (uncurry S.findSubstrings)
-                          $ randomSubstrings 128 1000000
-        , bench "random1024" . nf (uncurry S.findSubstrings)
-                          $ randomSubstrings 1024 1000000
-
-        ]
-      , bgroup "hard"
-        [ bench "hard1"    . nf (uncurry S.findSubstrings)
-                          $ hardSubstrings 1 1000000
-        , bench "hard4"    . nf (uncurry S.findSubstrings)
-                          $ hardSubstrings 4 1000000
-        , bench "hard16"   . nf (uncurry S.findSubstrings)
-                          $ hardSubstrings 16 1000000
-        , bench "hard64"   . nf (uncurry S.findSubstrings)
-                          $ hardSubstrings 64 1000000
-        , bench "hard128"  . nf (uncurry S.findSubstrings)
-                          $ hardSubstrings 128 1000000
-        , bench "hard1024" . nf (uncurry S.findSubstrings)
-                          $ hardSubstrings 1024 1000000
-        ]
-      , bgroup "pathological"
-        [ bench "pathological1"    . nf (uncurry S.findSubstrings)
-                          $ pathologicalSubstrings 1 1000000
-        , bench "pathological4"    . nf (uncurry S.findSubstrings)
-                          $ pathologicalSubstrings 4 1000000
-        , bench "pathological16"   . nf (uncurry S.findSubstrings)
-                          $ pathologicalSubstrings 16 1000000
-        , bench "pathological64"   . nf (uncurry S.findSubstrings)
-                          $ pathologicalSubstrings 64 1000000
-        , bench "pathological128"  . nf (uncurry S.findSubstrings)
-                          $ pathologicalSubstrings 128 1000000
-        , bench "pathological1024" . nf (uncurry S.findSubstrings)
-                          $ pathologicalSubstrings 1024 1000000
-        ]
-      , bgroup "html"
-        [ bench "html1"    . nfIO . fmap (uncurry S.findSubstrings)
-                          $ htmlSubstrings wikiPage 1 1000000
-        , bench "html4"    . nfIO . fmap (uncurry S.findSubstrings)
-                          $ htmlSubstrings wikiPage 4 1000000
-        , bench "html16"   . nfIO . fmap (uncurry S.findSubstrings)
-                          $ htmlSubstrings wikiPage 16 1000000
-        , bench "html64"   . nfIO . fmap (uncurry S.findSubstrings)
-                          $ htmlSubstrings wikiPage 64 1000000
-        , bench "html128"  . nfIO . fmap (uncurry S.findSubstrings)
-                          $ htmlSubstrings wikiPage 128 1000000
-        , bench "html1024" . nfIO . fmap (uncurry S.findSubstrings)
-                          $ htmlSubstrings wikiPage 1024 1000000
-        ]
-      ]
-
     , bgroup "Data.ByteString.Builder.Prim"
       [ benchFE "char7"      $ toEnum       >$< P.char7
       , benchFE "char8"      $ toEnum       >$< P.char8

diff --git a/tests/Properties.hs b/tests/Properties.hs
@@ -1252,48 +1252,6 @@ prop_initsBB xs = inits xs == map P.unpack (P.inits (P.pack xs))
 
 prop_tailsBB xs = tails xs == map P.unpack (P.tails (P.pack xs))
 
--- The correspondence between the test 'ByteString' and naive test 'String'
--- must be injective, otherwise the ByteString may find matches at positions
--- that don't match in the "corresponding" string.  To that end, we start
--- with and pack a Word8 array, rather than a unicode String.
---
-prop_findSubstringsBB :: [Word8] -> Int -> Int -> Bool
-prop_findSubstringsBB ws x l
-    = let bstr = P.pack ws
-          -- we look for some random substring of the test string
-          slice = C.take l $ C.drop x bstr
-          str = C.unpack bstr
-          substr = C.unpack slice
-      in C.findSubstrings slice bstr == naive_findSubstrings substr str
-  where
-    -- naive reference implementation
-    -- Note, overlapping matches have been broken since 2015, so at this
-    -- point just test for the current behaviour.
-    naive_findSubstrings :: String -> String -> [Int]
-    naive_findSubstrings p q
-        | null p    = [0..length q]
-        | otherwise = go 0 (length p) p (length q) q
-    go n !lp p !lq q =
-        if (lp > lq)
-        then []
-        else if p `isPrefixOf` q
-        then n : go (n + lp) lp p (lq - lp) (drop lp q)
-        else go (n + 1) lp p (lq - 1) (tail q)
-
--- See above re injective string -> bytestring correspondence.
-prop_findSubstringBB :: [Word8] -> Int -> Int -> Bool
-prop_findSubstringBB ws x l
-    = let bstr = P.pack ws
-          -- we look for some random substring of the test string
-          slice = C.take l $ C.drop x bstr
-          str = C.unpack bstr
-          substr = C.unpack slice
-      in C.findSubstring slice bstr == naive_findSubstring substr str
-  where
-    -- naive reference implementation
-    naive_findSubstring :: String -> String -> Maybe Int
-    naive_findSubstring p q = listToMaybe [x | x <- [0..length q], p `isPrefixOf` drop x q]
-
 -- correspondance between break and breakSubstring
 prop_breakSubstringBB c l
     = P.break (== c) l == P.breakSubstring (P.singleton c) l
@@ -1304,12 +1262,6 @@ prop_breakSubstring_isInfixOf s l
                                             (x,y) | P.null y  -> False
                                                   | otherwise -> True
 
-prop_breakSubstring_findSubstring s l
-    = P.findSubstring s l == if P.null s then Just 0
-                                       else case P.breakSubstring s l of
-                                            (x,y) | P.null y  -> Nothing
-                                                  | otherwise -> Just (P.length x)
-
 prop_replicate1BB c = forAll arbitrarySizedIntegral $ \n ->
                       P.unpack (P.replicate n c) == replicate n c
 prop_replicate2BB c = forAll arbitrarySizedIntegral $ \n ->
@@ -2277,10 +2229,7 @@ bb_tests =
     , testProperty "copy"           prop_copyLL
     , testProperty "inits"          prop_initsBB
     , testProperty "tails"          prop_tailsBB
-    , testProperty "findSubstrings "prop_findSubstringsBB
-    , testProperty "findSubstring "prop_findSubstringBB
     , testProperty "breakSubstring 1"prop_breakSubstringBB
-    , testProperty "breakSubstring 2"prop_breakSubstring_findSubstring
     , testProperty "breakSubstring 3"prop_breakSubstring_isInfixOf
 
     , testProperty "replicate1"     prop_replicate1BB