Skip to content

Commit

Permalink
Compute length at compile time for literal strings (#191)
Browse files Browse the repository at this point in the history
* Compute length at compile time for literal strings

Add unsafePackLiteral to Data.ByteString.Internal. With GHC-8.10+,
use known-key variant of C `strlen` from `GHC.CString` that supports
constant folding. Also in GHC 8.10, another data constructor of
ForeignPtrContents becomes available: LiteralPtr. For string literals,
this is now used. It saves space when there are lots of literals, and
it improves opportunities for case-of-known data constructor optimizations
when a function scrutinizes the length of a ByteString.

* Fix a few small things. Clean up warnings

* Fix aesthetic issues

* Update GHC version to 8.12

Co-authored-by: Simon Jakobi <simon.jakobi@gmail.com>

* Fix a few small things

* Fix copy-and-paste mistakes

* Use BS constructor in unsafePackAddress

Co-authored-by: Simon Jakobi <simon.jakobi@gmail.com>
  • Loading branch information
andrewthad and sjakobi committed Aug 25, 2020
1 parent 75aa5b0 commit 371f224
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 2 deletions.
5 changes: 5 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
[0.11.0.0]
* [Add `indexMaybe` and synonym `(!?)` for indexing that returns `Maybe`](https://github.com/haskell/bytestring/pull/261)
* Add `unsafePackLiteral` to `Data.ByteString.Internal`. When possible, use
[known-key](https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/compiler/wired-in)
variant of C's `strlen` from `GHC.CString` that supports constant folding.
This results in better generated code when an ASCII-only string literal
is desugared to `ByteString` by the `OverloadedStrings` extension. ([#191])

[0.11.0.0]: https://github.com/haskell/bytestring/compare/0.10.12.0...0.11.0.0

Expand Down
1 change: 1 addition & 0 deletions Data/ByteString.hs
Original file line number Diff line number Diff line change
Expand Up @@ -759,6 +759,7 @@ replicate w c
| w <= 0 = empty
| otherwise = unsafeCreate w $ \ptr ->
memset ptr c (fromIntegral w) >> return ()
{-# INLINE replicate #-}

-- | /O(n)/, where /n/ is the length of the result. The 'unfoldr'
-- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a
Expand Down
29 changes: 27 additions & 2 deletions Data/ByteString/Internal.hs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ module Data.ByteString.Internal (
packChars, packUptoLenChars, unsafePackLenChars,
unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict,
unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict,
unsafePackAddress,
unsafePackAddress, unsafePackLiteral,

-- * Low level imperative construction
create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString
Expand Down Expand Up @@ -161,6 +161,11 @@ import GHC.Types (Int (..))
import GHC.Prim (plusAddr#)
#endif

#if __GLASGOW_HASKELL__ >= 811
import GHC.CString (cstringLength#)
import GHC.ForeignPtr (ForeignPtrContents(FinalPtr))
#endif

import GHC.Ptr (Ptr(..), castPtr)

-- CFILES stuff is Hugs only
Expand Down Expand Up @@ -260,6 +265,7 @@ instance IsList ByteString where
-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ByteString where
{-# INLINE fromString #-}
fromString = packChars

instance Data ByteString where
Expand All @@ -281,7 +287,7 @@ packChars cs = unsafePackLenChars (List.length cs) cs

{-# RULES
"ByteString packChars/packAddress" forall s .
packChars (unpackCString# s) = accursedUnutterablePerformIO (unsafePackAddress s)
packChars (unpackCString# s) = unsafePackLiteral s
#-}

unsafePackLenBytes :: Int -> [Word8] -> ByteString
Expand Down Expand Up @@ -322,14 +328,33 @@ unsafePackLenChars len cs0 =
--
unsafePackAddress :: Addr# -> IO ByteString
unsafePackAddress addr# = do
#if __GLASGOW_HASKELL__ >= 811
return (BS (ForeignPtr addr# FinalPtr) (I# (cstringLength# addr#)))
#else
p <- newForeignPtr_ (castPtr cstr)
l <- c_strlen cstr
return $ BS p (fromIntegral l)
where
cstr :: CString
cstr = Ptr addr#
#endif
{-# INLINE unsafePackAddress #-}

-- | See 'unsafePackAddress'. This function has similar behavior. Prefer
-- this function when the address in known to be an @Addr#@ literal. In
-- that context, there is no need for the sequencing guarantees that 'IO'
-- provides. On GHC 9.0 and up, this function uses the @FinalPtr@ data
-- constructor for @ForeignPtrContents@.
unsafePackLiteral :: Addr# -> ByteString
unsafePackLiteral addr# =
#if __GLASGOW_HASKELL__ >= 811
BS (ForeignPtr addr# FinalPtr) (I# (cstringLength# addr#))
#else
let len = accursedUnutterablePerformIO (c_strlen (Ptr addr#))
in BS (accursedUnutterablePerformIO (newForeignPtr_ (Ptr addr#))) (fromIntegral len)
#endif
{-# INLINE unsafePackLiteral #-}


packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8])
packUptoLenBytes len xs0 =
Expand Down

0 comments on commit 371f224

Please sign in to comment.