Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Deduplicate findIndexOrEnd by exporting it from Data.ByteString.Internal #337

Merged
merged 2 commits into from
Jan 11, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 0 additions & 15 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2023,21 +2023,6 @@ appendFile = modifyFile AppendMode
-- ---------------------------------------------------------------------
-- Internal utilities

-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
-- of the string if no element is found, rather than Nothing.
findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
findIndexOrEnd k (BS x l) =
accursedUnutterablePerformIO $ withForeignPtr x g
where
g ptr = go 0
where
go !n | n >= l = return l
| otherwise = do w <- peek $ ptr `plusPtr` n
if k w
then return n
else go (n+1)
{-# INLINE findIndexOrEnd #-}

-- Common up near identical calls to `error' to reduce the number
-- constant strings created when compiled:
errorEmptyList :: String -> a
Expand Down
21 changes: 21 additions & 0 deletions Data/ByteString/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ module Data.ByteString.Internal (
#endif
), -- instances: Eq, Ord, Show, Read, Data, Typeable

-- * Internal indexing
findIndexOrEnd,

-- * Conversion with lists: packing and unpacking
packBytes, packUptoLenBytes, unsafePackLenBytes,
packChars, packUptoLenChars, unsafePackLenChars,
Expand Down Expand Up @@ -288,6 +291,24 @@ instance Data ByteString where
gunfold _ _ = error "Data.ByteString.ByteString.gunfold"
dataTypeOf _ = mkNoRepType "Data.ByteString.ByteString"

------------------------------------------------------------------------
-- Internal indexing

-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
-- of the string if no element is found, rather than Nothing.
findIndexOrEnd :: (Word8 -> Bool) -> ByteString -> Int
findIndexOrEnd k (BS x l) =
accursedUnutterablePerformIO $ withForeignPtr x g
where
g ptr = go 0
where
go !n | n >= l = return l
| otherwise = do w <- peek $ ptr `plusPtr` n
if k w
then return n
else go (n+1)
{-# INLINE findIndexOrEnd #-}

------------------------------------------------------------------------
-- Packing and unpacking from lists

Expand Down
24 changes: 5 additions & 19 deletions Data/ByteString/Lazy.hs
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,7 @@ takeWhile :: (Word8 -> Bool) -> ByteString -> ByteString
takeWhile f = takeWhile'
where takeWhile' Empty = Empty
takeWhile' (Chunk c cs) =
case findIndexOrEnd (not . f) c of
case S.findIndexOrEnd (not . f) c of
0 -> Empty
n | n < S.length c -> Chunk (S.take n c) Empty
| otherwise -> Chunk c (takeWhile' cs)
Expand All @@ -733,7 +733,7 @@ dropWhile :: (Word8 -> Bool) -> ByteString -> ByteString
dropWhile f = dropWhile'
where dropWhile' Empty = Empty
dropWhile' (Chunk c cs) =
case findIndexOrEnd (not . f) c of
case S.findIndexOrEnd (not . f) c of
n | n < S.length c -> Chunk (S.drop n c) cs
| otherwise -> dropWhile' cs

Expand All @@ -747,7 +747,7 @@ break :: (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
break f = break'
where break' Empty = (Empty, Empty)
break' (Chunk c cs) =
case findIndexOrEnd f c of
case S.findIndexOrEnd f c of
0 -> (Empty, Chunk c cs)
n | n < S.length c -> (Chunk (S.take n c) Empty
,Chunk (S.drop n c) cs)
Expand Down Expand Up @@ -867,7 +867,7 @@ group = go

to acc !_ Empty = [revNonEmptyChunks acc]
to acc !w (Chunk c cs) =
case findIndexOrEnd (/= w) c of
case S.findIndexOrEnd (/= w) c of
0 -> revNonEmptyChunks acc
: go (Chunk c cs)
n | n == S.length c -> to (S.unsafeTake n c : acc) w cs
Expand All @@ -886,7 +886,7 @@ groupBy k = go

to acc !_ Empty = [revNonEmptyChunks acc]
to acc !w (Chunk c cs) =
case findIndexOrEnd (not . k w) c of
case S.findIndexOrEnd (not . k w) c of
0 -> revNonEmptyChunks acc
: go (Chunk c cs)
n | n == S.length c -> to (S.unsafeTake n c : acc) w cs
Expand Down Expand Up @@ -1408,20 +1408,6 @@ revNonEmptyChunks = L.foldl' (flip Chunk) Empty
revChunks :: [P.ByteString] -> ByteString
revChunks = L.foldl' (flip chunk) Empty

-- | 'findIndexOrEnd' is a variant of findIndex, that returns the length
-- of the string if no element is found, rather than Nothing.
findIndexOrEnd :: (Word8 -> Bool) -> P.ByteString -> Int
findIndexOrEnd k (S.BS x l) =
S.accursedUnutterablePerformIO $
withForeignPtr x $ \f -> go f 0
where
go !ptr !n | n >= l = return l
| otherwise = do w <- peek ptr
if k w
then return n
else go (ptr `plusPtr` 1) (n+1)
{-# INLINE findIndexOrEnd #-}

-- $IOChunk
--
-- ⚠ Using lazy I\/O functions like 'readFile' or 'hGetContents'
Expand Down
15 changes: 15 additions & 0 deletions bench/BenchAll.hs
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,12 @@ sortInputs = map (`S.take` S.pack [122, 121 .. 32]) [10..25]
foldInputs :: [S.ByteString]
foldInputs = map (\k -> S.pack $ if k <= 6 then take (2 ^ k) [32..95] else concat (replicate (2 ^ (k - 6)) [32..95])) [0..16]

zeroes :: L.ByteString
zeroes = L.replicate 10000 0

zeroOneRepeating :: L.ByteString
zeroOneRepeating = L.take 10000 (L.cycle (L.pack [0,1]))

main :: IO ()
main = do
mapM_ putStrLn sanityCheckInfo
Expand Down Expand Up @@ -409,4 +415,13 @@ main = do
, bgroup "filter" $ map (\s -> bench (show $ S.length s) $
nf (S.filter odd) s) foldInputs
]
, bgroup "findIndexOrEnd"
[ bench "takeWhile" $ nf (L.takeWhile even) zeroes
, bench "dropWhile" $ nf (L.dropWhile even) zeroes
, bench "break" $ nf (L.break odd) zeroes
, bench "group zeroes" $ nf L.group zeroes
, bench "group zero-one" $ nf L.group zeroOneRepeating
, bench "groupBy (>=)" $ nf (L.groupBy (>=)) zeroes
, bench "groupBy (>)" $ nf (L.groupBy (>)) zeroes
]
]