From 91a727c2e52b13e4ad259293ed0024531ce48d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=CB=8Cbod=CA=B2=C9=AA=CB=88=C9=A1r=CA=B2im?= Date: Sun, 9 Oct 2022 22:28:50 +0100 Subject: [PATCH] Switch Data.ByteString.Short to ByteArray (#410) * Switch Data.ByteString.Short to reuse Data.Array.Byte * Improve documentation * Update changelog * Restore our own instances for Semigroup and Monoid, they are safer w.r.t. overflows --- .github/workflows/ci.yml | 5 +- Changelog.md | 2 + Data/ByteString/Short/Internal.hs | 103 ++++++++++-------------------- bytestring.cabal | 3 + 4 files changed, 42 insertions(+), 71 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c93fa520..5724e8849 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -133,10 +133,11 @@ jobs: githubToken: ${{ github.token }} install: | apt-get update -y - apt-get install -y ghc libghc-tasty-quickcheck-dev + apt-get install -y curl ghc libghc-tasty-quickcheck-dev run: | + curl -s https://hackage.haskell.org/package/data-array-byte-0.1/data-array-byte-0.1.tar.gz | tar xz ghc --version - ghc --make -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s + ghc --make -Iinclude -itests:tests/builder:data-array-byte-0.1 -o Main cbits/*.c tests/Main.hs +RTS -s ./Main +RTS -s bounds-checking: diff --git a/Changelog.md b/Changelog.md index fd24dbe85..78e3b7a66 100644 --- a/Changelog.md +++ b/Changelog.md @@ -4,6 +4,8 @@ * [`readInt` returns `Nothing`, if the sequence of digits cannot be represented by an `Int`, instead of overflowing silently](https://github.com/haskell/bytestring/pull/309) * [Remove `zipWith` rewrite rule](https://github.com/haskell/bytestring/pull/387) * [Export `unsafeIndex` for ShortByteString which had been accidentally removed in v0.11.3.0](https://github.com/haskell/bytestring/pull/532) +* [`ShortByteString` is now a wrapper over boxed `Data.Array.Byte.ByteArray` instead of unboxed `ByteArray#` directly](https://github.com/haskell/bytestring/pull/410) +* [`fromListN` from `instance IsList ShortByteString` throws an exception if the first argument does not match the length of the second instead of silent ignore](https://github.com/haskell/bytestring/pull/410) [0.12.0.0]: https://github.com/haskell/bytestring/compare/0.11.3.0...0.12.0.0 diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs index 8639a3e36..c29582e11 100644 --- a/Data/ByteString/Short/Internal.hs +++ b/Data/ByteString/Short/Internal.hs @@ -1,10 +1,13 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveLift #-} {-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskellQuotes #-} @@ -13,6 +16,7 @@ {-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE UnliftedFFITypes #-} {-# LANGUAGE Unsafe #-} + {-# OPTIONS_HADDOCK not-home #-} {-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-} @@ -40,7 +44,7 @@ module Data.ByteString.Short.Internal ( -- * The @ShortByteString@ type and representation - ShortByteString(..), + ShortByteString(.., SBS), -- * Introducing and eliminating 'ShortByteString's empty, @@ -162,6 +166,8 @@ import Data.ByteString.Internal , checkedAdd ) +import Data.Array.Byte + ( ByteArray(..) ) import Data.Bits ( FiniteBits (finiteBitSize) , shiftL @@ -172,21 +178,17 @@ import Data.Bits , (.|.) ) import Data.Data - ( Data(..) - , mkNoRepType - ) + ( Data(..) ) import Data.Monoid ( Monoid(..) ) import Data.Semigroup ( Semigroup((<>)) ) import Data.String ( IsString(..) ) -import Data.Typeable - ( Typeable ) import Control.Applicative ( pure ) import Control.DeepSeq - ( NFData(..) ) + ( NFData ) import Control.Exception ( assert ) import Control.Monad @@ -269,7 +271,6 @@ import qualified Data.ByteString.Internal as BS import qualified Data.List as List import qualified GHC.Exts -import qualified Language.Haskell.TH.Lib as TH import qualified Language.Haskell.TH.Syntax as TH -- | A compact representation of a 'Word8' vector. @@ -279,43 +280,31 @@ import qualified Language.Haskell.TH.Syntax as TH -- 'ByteString' (at the cost of copying the string data). It supports very few -- other operations. -- -data ShortByteString = SBS ByteArray# - deriving Typeable - --- | @since 0.11.2.0 -instance TH.Lift ShortByteString where -#if MIN_VERSION_template_haskell(2,16,0) - lift sbs = [| unsafePackLenLiteral |] - `TH.appE` TH.litE (TH.integerL (fromIntegral len)) - `TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len)) - where - BS ptr len = fromShort sbs -#else - lift sbs = [| unsafePackLenLiteral |] - `TH.appE` TH.litE (TH.integerL (fromIntegral len)) - `TH.appE` TH.litE (TH.StringPrimL $ BS.unpackBytes bs) - where - bs@(BS _ len) = fromShort sbs -#endif - -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - --- The ByteArray# representation is always word sized and aligned but with a --- known byte length. Our representation choice for ShortByteString is to leave --- the 0--3 trailing bytes undefined. This means we can use word-sized writes, --- but we have to be careful with reads, see equateBytes and compareBytes below. - - -instance Eq ShortByteString where - (==) = equateBytes - +newtype ShortByteString = + -- | @since 0.12.0.0 + ShortByteString + { unShortByteString :: ByteArray + -- ^ @since 0.12.0.0 + } + deriving (Eq, TH.Lift, Data, NFData) + +-- | Prior to @bytestring-0.12@ 'SBS' was a genuine constructor of 'ShortByteString', +-- but now it is a bundled pattern synonym, provided as a compatibility shim. +pattern SBS :: ByteArray# -> ShortByteString +pattern SBS x = ShortByteString (ByteArray x) +{-# COMPLETE SBS #-} + +-- | Lexicographic order. instance Ord ShortByteString where compare = compareBytes +-- Instead of deriving Semigroup / Monoid , we stick to our own implementations +-- of mappend / mconcat, because they are safer with regards to overflows +-- (see prop_32bitOverflow_Short_mconcat test). +-- ByteArray is likely to catch up starting from GHC 9.6: +-- * https://gitlab.haskell.org/ghc/ghc/-/merge_requests/8272 +-- * https://gitlab.haskell.org/ghc/ghc/-/merge_requests/9128 + instance Semigroup ShortByteString where (<>) = append @@ -324,9 +313,6 @@ instance Monoid ShortByteString where mappend = (<>) mconcat = concat -instance NFData ShortByteString where - rnf SBS{} = () - instance Show ShortByteString where showsPrec p ps r = showsPrec p (unpackChars ps) r @@ -336,20 +322,15 @@ instance Read ShortByteString where -- | @since 0.10.12.0 instance GHC.Exts.IsList ShortByteString where type Item ShortByteString = Word8 - fromList = packBytes - toList = unpack + fromList = ShortByteString . GHC.Exts.fromList + fromListN = (ShortByteString .) . GHC.Exts.fromListN + toList = GHC.Exts.toList . unShortByteString -- | Beware: 'fromString' truncates multi-byte characters to octets. -- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n� instance IsString ShortByteString where fromString = packChars -instance Data ShortByteString where - gfoldl f z txt = z packBytes `f` unpack txt - toConstr _ = error "Data.ByteString.Short.ShortByteString.toConstr" - gunfold _ _ = error "Data.ByteString.Short.ShortByteString.gunfold" - dataTypeOf _ = mkNoRepType "Data.ByteString.Short.ShortByteString" - ------------------------------------------------------------------------ -- Simple operations @@ -402,12 +383,6 @@ indexError sbs i = moduleError "index" $ "error in array index: " ++ show i ++ " not in range [0.." ++ show (length sbs) ++ "]" --- | @since 0.11.2.0 -unsafePackLenLiteral :: Int -> Addr# -> ShortByteString -unsafePackLenLiteral len addr# = - -- createFromPtr allocates, so accursedUnutterablePerformIO is wrong - unsafeDupablePerformIO $ createFromPtr (Ptr addr#) len - ------------------------------------------------------------------------ -- Internal utils @@ -642,13 +617,6 @@ unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len) ------------------------------------------------------------------------ -- Eq and Ord implementations -equateBytes :: ShortByteString -> ShortByteString -> Bool -equateBytes sbs1 sbs2 = - let !len1 = length sbs1 - !len2 = length sbs2 - in len1 == len2 - && 0 == compareByteArrays (asBA sbs1) (asBA sbs2) len1 - compareBytes :: ShortByteString -> ShortByteString -> Ordering compareBytes sbs1 sbs2 = let !len1 = length sbs1 @@ -661,7 +629,6 @@ compareBytes sbs1 sbs2 = | len2 < len1 -> GT | otherwise -> EQ - ------------------------------------------------------------------------ -- Appending and concatenation @@ -1597,8 +1564,6 @@ findIndices k = \sbs -> | otherwise = go (n + 1) in go 0 - - ------------------------------------------------------------------------ -- Exported low level operations diff --git a/bytestring.cabal b/bytestring.cabal index fd5656fc4..bd47cfc9a 100644 --- a/bytestring.cabal +++ b/bytestring.cabal @@ -74,6 +74,9 @@ source-repository head library build-depends: base >= 4.9 && < 5, ghc-prim, deepseq, template-haskell + if impl(ghc < 9.4) + build-depends: data-array-byte >= 0.1 && < 0.2 + exposed-modules: Data.ByteString Data.ByteString.Char8 Data.ByteString.Unsafe