From 371f2247623c19e073fd22b3e4d2455b7fd040a4 Mon Sep 17 00:00:00 2001 From: Andrew Martin Date: Tue, 25 Aug 2020 14:13:36 -0400 Subject: [PATCH] Compute length at compile time for literal strings (#191) * Compute length at compile time for literal strings Add unsafePackLiteral to Data.ByteString.Internal. With GHC-8.10+, use known-key variant of C `strlen` from `GHC.CString` that supports constant folding. Also in GHC 8.10, another data constructor of ForeignPtrContents becomes available: LiteralPtr. For string literals, this is now used. It saves space when there are lots of literals, and it improves opportunities for case-of-known data constructor optimizations when a function scrutinizes the length of a ByteString. * Fix a few small things. Clean up warnings * Fix aesthetic issues * Update GHC version to 8.12 Co-authored-by: Simon Jakobi * Fix a few small things * Fix copy-and-paste mistakes * Use BS constructor in unsafePackAddress Co-authored-by: Simon Jakobi --- Changelog.md | 5 +++++ Data/ByteString.hs | 1 + Data/ByteString/Internal.hs | 29 +++++++++++++++++++++++++++-- 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/Changelog.md b/Changelog.md index 2f0260257..42059835f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,5 +1,10 @@ [0.11.0.0] — * [Add `indexMaybe` and synonym `(!?)` for indexing that returns `Maybe`](https://github.com/haskell/bytestring/pull/261) + * Add `unsafePackLiteral` to `Data.ByteString.Internal`. When possible, use + [known-key](https://gitlab.haskell.org/ghc/ghc/-/wikis/commentary/compiler/wired-in) + variant of C's `strlen` from `GHC.CString` that supports constant folding. + This results in better generated code when an ASCII-only string literal + is desugared to `ByteString` by the `OverloadedStrings` extension. ([#191]) [0.11.0.0]: https://github.com/haskell/bytestring/compare/0.10.12.0...0.11.0.0 diff --git a/Data/ByteString.hs b/Data/ByteString.hs index dbb56d148..7b1321cfa 100644 --- a/Data/ByteString.hs +++ b/Data/ByteString.hs @@ -759,6 +759,7 @@ replicate w c | w <= 0 = empty | otherwise = unsafeCreate w $ \ptr -> memset ptr c (fromIntegral w) >> return () +{-# INLINE replicate #-} -- | /O(n)/, where /n/ is the length of the result. The 'unfoldr' -- function is analogous to the List \'unfoldr\'. 'unfoldr' builds a diff --git a/Data/ByteString/Internal.hs b/Data/ByteString/Internal.hs index 4f88f4415..1c4d22fca 100644 --- a/Data/ByteString/Internal.hs +++ b/Data/ByteString/Internal.hs @@ -42,7 +42,7 @@ module Data.ByteString.Internal ( packChars, packUptoLenChars, unsafePackLenChars, unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict, unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict, - unsafePackAddress, + unsafePackAddress, unsafePackLiteral, -- * Low level imperative construction create, -- :: Int -> (Ptr Word8 -> IO ()) -> IO ByteString @@ -161,6 +161,11 @@ import GHC.Types (Int (..)) import GHC.Prim (plusAddr#) #endif +#if __GLASGOW_HASKELL__ >= 811 +import GHC.CString (cstringLength#) +import GHC.ForeignPtr (ForeignPtrContents(FinalPtr)) +#endif + import GHC.Ptr (Ptr(..), castPtr) -- CFILES stuff is Hugs only @@ -260,6 +265,7 @@ instance IsList ByteString where -- | Beware: 'fromString' truncates multi-byte characters to octets. -- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n� instance IsString ByteString where + {-# INLINE fromString #-} fromString = packChars instance Data ByteString where @@ -281,7 +287,7 @@ packChars cs = unsafePackLenChars (List.length cs) cs {-# RULES "ByteString packChars/packAddress" forall s . - packChars (unpackCString# s) = accursedUnutterablePerformIO (unsafePackAddress s) + packChars (unpackCString# s) = unsafePackLiteral s #-} unsafePackLenBytes :: Int -> [Word8] -> ByteString @@ -322,14 +328,33 @@ unsafePackLenChars len cs0 = -- unsafePackAddress :: Addr# -> IO ByteString unsafePackAddress addr# = do +#if __GLASGOW_HASKELL__ >= 811 + return (BS (ForeignPtr addr# FinalPtr) (I# (cstringLength# addr#))) +#else p <- newForeignPtr_ (castPtr cstr) l <- c_strlen cstr return $ BS p (fromIntegral l) where cstr :: CString cstr = Ptr addr# +#endif {-# INLINE unsafePackAddress #-} +-- | See 'unsafePackAddress'. This function has similar behavior. Prefer +-- this function when the address in known to be an @Addr#@ literal. In +-- that context, there is no need for the sequencing guarantees that 'IO' +-- provides. On GHC 9.0 and up, this function uses the @FinalPtr@ data +-- constructor for @ForeignPtrContents@. +unsafePackLiteral :: Addr# -> ByteString +unsafePackLiteral addr# = +#if __GLASGOW_HASKELL__ >= 811 + BS (ForeignPtr addr# FinalPtr) (I# (cstringLength# addr#)) +#else + let len = accursedUnutterablePerformIO (c_strlen (Ptr addr#)) + in BS (accursedUnutterablePerformIO (newForeignPtr_ (Ptr addr#))) (fromIntegral len) +#endif +{-# INLINE unsafePackLiteral #-} + packUptoLenBytes :: Int -> [Word8] -> (ByteString, [Word8]) packUptoLenBytes len xs0 =