haskell · Bodigrim · Jan 11, 2021 · Jan 11, 2021 · Dec 19, 2020
diff --git a/Data/ByteString.hs b/Data/ByteString.hs
@@ -2023,21 +2023,6 @@ appendFile = modifyFile AppendMode
 -- ---------------------------------------------------------------------
 -- Internal utilities
 
--- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
--- of the string if no element is found, rather than Nothing.
-findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
-findIndexOrEnd k (BS x l) =
-    accursedUnutterablePerformIO $ withForeignPtr x g
-  where
-    g ptr = go 0
-      where
-        go !n | n >= l    = return l
-              | otherwise = do w <- peek $ ptr `plusPtr` n
-                               if k w
-                                 then return n
-                                 else go (n+1)
-{-# INLINE findIndexOrEnd #-}
-
 -- Common up near identical calls to `error' to reduce the number
 -- constant strings created when compiled:
 errorEmptyList :: String -> a

diff --git a/Data/ByteString/Internal.hs b/Data/ByteString/Internal.hs
@@ -38,6 +38,9 @@ module Data.ByteString.Internal (
 #endif
         ), -- instances: Eq, Ord, Show, Read, Data, Typeable
 
+        -- * Internal indexing
+        findIndexOrEnd,
+
         -- * Conversion with lists: packing and unpacking
         packBytes, packUptoLenBytes, unsafePackLenBytes,
         packChars, packUptoLenChars, unsafePackLenChars,
@@ -288,6 +291,24 @@ instance Data ByteString where
   gunfold _ _    = error "Data.ByteString.ByteString.gunfold"
   dataTypeOf _   = mkNoRepType "Data.ByteString.ByteString"
 
+------------------------------------------------------------------------
+-- Internal indexing
+
+-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
+-- of the string if no element is found, rather than Nothing.
+findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
+findIndexOrEnd k (BS x l) =
+    accursedUnutterablePerformIO $ withForeignPtr x g
+  where
+    g ptr = go 0
+      where
+        go !n | n >= l    = return l
+              | otherwise = do w <- peek $ ptr `plusPtr` n
+                               if k w
+                                 then return n
+                                 else go (n+1)
+{-# INLINE findIndexOrEnd #-}
+
 ------------------------------------------------------------------------
 -- Packing and unpacking from lists
 

diff --git a/Data/ByteString/Lazy.hs b/Data/ByteString/Lazy.hs
@@ -721,7 +721,7 @@ takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 takeWhile f = takeWhile'
   where takeWhile' Empty        = Empty
         takeWhile' (Chunk c cs) =
-          case findIndexOrEnd (not . f) c of
+          case S.findIndexOrEnd (not . f) c of
             0                  -> Empty
             n | n < S.length c -> Chunk (S.take n c) Empty
               | otherwise      -> Chunk c (takeWhile' cs)
@@ -733,7 +733,7 @@ dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
 dropWhile f = dropWhile'
   where dropWhile' Empty        = Empty
         dropWhile' (Chunk c cs) =
-          case findIndexOrEnd (not . f) c of
+          case S.findIndexOrEnd (not . f) c of
             n | n < S.length c -> Chunk (S.drop n c) cs
               | otherwise      -> dropWhile' cs
 
@@ -747,7 +747,7 @@ break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
 break f = break'
   where break' Empty        = (Empty, Empty)
         break' (Chunk c cs) =
-          case findIndexOrEnd f c of
+          case S.findIndexOrEnd f c of
             0                  -> (Empty, Chunk c cs)
             n | n < S.length c -> (Chunk (S.take n c) Empty
                                   ,Chunk (S.drop n c) cs)
@@ -867,7 +867,7 @@ group = go
 
     to acc !_ Empty        = [revNonEmptyChunks acc]
     to acc !w (Chunk c cs) =
-      case findIndexOrEnd (/= w) c of
+      case S.findIndexOrEnd (/= w) c of
         0                    -> revNonEmptyChunks acc
                               : go (Chunk c cs)
         n | n == S.length c  -> to (S.unsafeTake n c : acc) w cs
@@ -886,7 +886,7 @@ groupBy k = go
 
     to acc !_ Empty        = [revNonEmptyChunks acc]
     to acc !w (Chunk c cs) =
-      case findIndexOrEnd (not . k w) c of
+      case S.findIndexOrEnd (not . k w) c of
         0                    -> revNonEmptyChunks acc
                               : go (Chunk c cs)
         n | n == S.length c  -> to (S.unsafeTake n c : acc) w cs
@@ -1408,20 +1408,6 @@ revNonEmptyChunks = L.foldl' (flip Chunk) Empty
 revChunks :: [P.ByteString] -> ByteString
 revChunks = L.foldl' (flip chunk) Empty
 
--- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
--- of the string if no element is found, rather than Nothing.
-findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
-findIndexOrEnd k (S.BS x l) =
-    S.accursedUnutterablePerformIO $
-      withForeignPtr x $ \f -> go f 0
-  where
-    go !ptr !n | n >= l    = return l
-               | otherwise = do w <- peek ptr
-                                if k w
-                                  then return n
-                                  else go (ptr `plusPtr` 1) (n+1)
-{-# INLINE findIndexOrEnd #-}
-
 -- $IOChunk
 --
 -- ⚠ Using lazy I\/O functions like 'readFile' or 'hGetContents'

diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs
@@ -226,6 +226,12 @@ sortInputs = map (`S.take` S.pack [122, 121 .. 32]) [10..25]
 foldInputs :: [S.ByteString]
 foldInputs = map (\k -> S.pack $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16]
 
+zeroes :: L.ByteString
+zeroes = L.replicate 10000 0
+
+zeroOneRepeating :: L.ByteString
+zeroOneRepeating = L.take 10000 (L.cycle (L.pack [0,1]))
+
 main :: IO ()
 main = do
   mapM_ putStrLn sanityCheckInfo
@@ -409,4 +415,13 @@ main = do
       , bgroup "filter" $ map (\s -> bench (show $ S.length s) $
           nf (S.filter odd) s) foldInputs
       ]
+    , bgroup "findIndexOrEnd"
+      [ bench "takeWhile"      $ nf (L.takeWhile even) zeroes
+      , bench "dropWhile"      $ nf (L.dropWhile even) zeroes
+      , bench "break"          $ nf (L.break odd) zeroes
+      , bench "group zeroes"   $ nf L.group zeroes
+      , bench "group zero-one" $ nf L.group zeroOneRepeating
+      , bench "groupBy (>=)"   $ nf (L.groupBy (>=)) zeroes
+      , bench "groupBy (>)"    $ nf (L.groupBy (>)) zeroes
+      ]
     ]