Skip to content

Commit

Permalink
Switch Data.ByteString.Short to ByteArray (#410)
Browse files Browse the repository at this point in the history
* Switch Data.ByteString.Short to reuse Data.Array.Byte

* Improve documentation

* Update changelog

* Restore our own instances for Semigroup and Monoid, they are safer w.r.t. overflows
  • Loading branch information
Bodigrim committed Oct 9, 2022
1 parent dcfe741 commit 91a727c
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 71 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Expand Up @@ -133,10 +133,11 @@ jobs:
githubToken: ${{ github.token }}
install: |
apt-get update -y
apt-get install -y ghc libghc-tasty-quickcheck-dev
apt-get install -y curl ghc libghc-tasty-quickcheck-dev
run: |
curl -s https://hackage.haskell.org/package/data-array-byte-0.1/data-array-byte-0.1.tar.gz | tar xz
ghc --version
ghc --make -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s
ghc --make -Iinclude -itests:tests/builder:data-array-byte-0.1 -o Main cbits/*.c tests/Main.hs +RTS -s
./Main +RTS -s
bounds-checking:
Expand Down
2 changes: 2 additions & 0 deletions Changelog.md
Expand Up @@ -4,6 +4,8 @@
* [`readInt` returns `Nothing`, if the sequence of digits cannot be represented by an `Int`, instead of overflowing silently](https://github.com/haskell/bytestring/pull/309)
* [Remove `zipWith` rewrite rule](https://github.com/haskell/bytestring/pull/387)
* [Export `unsafeIndex` for ShortByteString which had been accidentally removed in v0.11.3.0](https://github.com/haskell/bytestring/pull/532)
* [`ShortByteString` is now a wrapper over boxed `Data.Array.Byte.ByteArray` instead of unboxed `ByteArray#` directly](https://github.com/haskell/bytestring/pull/410)
* [`fromListN` from `instance IsList ShortByteString` throws an exception if the first argument does not match the length of the second instead of silent ignore](https://github.com/haskell/bytestring/pull/410)


[0.12.0.0]: https://github.com/haskell/bytestring/compare/0.11.3.0...0.12.0.0
Expand Down
103 changes: 34 additions & 69 deletions Data/ByteString/Short/Internal.hs
@@ -1,10 +1,13 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveLift #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskellQuotes #-}
Expand All @@ -13,6 +16,7 @@
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE Unsafe #-}

{-# OPTIONS_HADDOCK not-home #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}
Expand Down Expand Up @@ -40,7 +44,7 @@
module Data.ByteString.Short.Internal (

-- * The @ShortByteString@ type and representation
ShortByteString(..),
ShortByteString(.., SBS),

-- * Introducing and eliminating 'ShortByteString's
empty,
Expand Down Expand Up @@ -162,6 +166,8 @@ import Data.ByteString.Internal
, checkedAdd
)

import Data.Array.Byte
( ByteArray(..) )
import Data.Bits
( FiniteBits (finiteBitSize)
, shiftL
Expand All @@ -172,21 +178,17 @@ import Data.Bits
, (.|.)
)
import Data.Data
( Data(..)
, mkNoRepType
)
( Data(..) )
import Data.Monoid
( Monoid(..) )
import Data.Semigroup
( Semigroup((<>)) )
import Data.String
( IsString(..) )
import Data.Typeable
( Typeable )
import Control.Applicative
( pure )
import Control.DeepSeq
( NFData(..) )
( NFData )
import Control.Exception
( assert )
import Control.Monad
Expand Down Expand Up @@ -269,7 +271,6 @@ import qualified Data.ByteString.Internal as BS

import qualified Data.List as List
import qualified GHC.Exts
import qualified Language.Haskell.TH.Lib as TH
import qualified Language.Haskell.TH.Syntax as TH

-- | A compact representation of a 'Word8' vector.
Expand All @@ -279,43 +280,31 @@ import qualified Language.Haskell.TH.Syntax as TH
-- 'ByteString' (at the cost of copying the string data). It supports very few
-- other operations.
--
data ShortByteString = SBS ByteArray#
deriving Typeable

-- | @since 0.11.2.0
instance TH.Lift ShortByteString where
#if MIN_VERSION_template_haskell(2,16,0)
lift sbs = [| unsafePackLenLiteral |]
`TH.appE` TH.litE (TH.integerL (fromIntegral len))
`TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len))
where
BS ptr len = fromShort sbs
#else
lift sbs = [| unsafePackLenLiteral |]
`TH.appE` TH.litE (TH.integerL (fromIntegral len))
`TH.appE` TH.litE (TH.StringPrimL $ BS.unpackBytes bs)
where
bs@(BS _ len) = fromShort sbs
#endif

#if MIN_VERSION_template_haskell(2,17,0)
liftTyped = TH.unsafeCodeCoerce . TH.lift
#elif MIN_VERSION_template_haskell(2,16,0)
liftTyped = TH.unsafeTExpCoerce . TH.lift
#endif

-- The ByteArray# representation is always word sized and aligned but with a
-- known byte length. Our representation choice for ShortByteString is to leave
-- the 0--3 trailing bytes undefined. This means we can use word-sized writes,
-- but we have to be careful with reads, see equateBytes and compareBytes below.


instance Eq ShortByteString where
(==) = equateBytes

newtype ShortByteString =
-- | @since 0.12.0.0
ShortByteString
{ unShortByteString :: ByteArray
-- ^ @since 0.12.0.0
}
deriving (Eq, TH.Lift, Data, NFData)

-- | Prior to @bytestring-0.12@ 'SBS' was a genuine constructor of 'ShortByteString',
-- but now it is a bundled pattern synonym, provided as a compatibility shim.
pattern SBS :: ByteArray# -> ShortByteString
pattern SBS x = ShortByteString (ByteArray x)
{-# COMPLETE SBS #-}

-- | Lexicographic order.
instance Ord ShortByteString where
compare = compareBytes

-- Instead of deriving Semigroup / Monoid , we stick to our own implementations
-- of mappend / mconcat, because they are safer with regards to overflows
-- (see prop_32bitOverflow_Short_mconcat test).
-- ByteArray is likely to catch up starting from GHC 9.6:
-- * https://gitlab.haskell.org/ghc/ghc/-/merge_requests/8272
-- * https://gitlab.haskell.org/ghc/ghc/-/merge_requests/9128

instance Semigroup ShortByteString where
(<>) = append

Expand All @@ -324,9 +313,6 @@ instance Monoid ShortByteString where
mappend = (<>)
mconcat = concat

instance NFData ShortByteString where
rnf SBS{} = ()

instance Show ShortByteString where
showsPrec p ps r = showsPrec p (unpackChars ps) r

Expand All @@ -336,20 +322,15 @@ instance Read ShortByteString where
-- | @since 0.10.12.0
instance GHC.Exts.IsList ShortByteString where
type Item ShortByteString = Word8
fromList = packBytes
toList = unpack
fromList = ShortByteString . GHC.Exts.fromList
fromListN = (ShortByteString .) . GHC.Exts.fromListN
toList = GHC.Exts.toList . unShortByteString

-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ShortByteString where
fromString = packChars

instance Data ShortByteString where
gfoldl f z txt = z packBytes `f` unpack txt
toConstr _ = error "Data.ByteString.Short.ShortByteString.toConstr"
gunfold _ _ = error "Data.ByteString.Short.ShortByteString.gunfold"
dataTypeOf _ = mkNoRepType "Data.ByteString.Short.ShortByteString"

------------------------------------------------------------------------
-- Simple operations

Expand Down Expand Up @@ -402,12 +383,6 @@ indexError sbs i =
moduleError "index" $ "error in array index: " ++ show i
++ " not in range [0.." ++ show (length sbs) ++ "]"

-- | @since 0.11.2.0
unsafePackLenLiteral :: Int -> Addr# -> ShortByteString
unsafePackLenLiteral len addr# =
-- createFromPtr allocates, so accursedUnutterablePerformIO is wrong
unsafeDupablePerformIO $ createFromPtr (Ptr addr#) len

------------------------------------------------------------------------
-- Internal utils

Expand Down Expand Up @@ -642,13 +617,6 @@ unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len)
------------------------------------------------------------------------
-- Eq and Ord implementations

equateBytes :: ShortByteString -> ShortByteString -> Bool
equateBytes sbs1 sbs2 =
let !len1 = length sbs1
!len2 = length sbs2
in len1 == len2
&& 0 == compareByteArrays (asBA sbs1) (asBA sbs2) len1

compareBytes :: ShortByteString -> ShortByteString -> Ordering
compareBytes sbs1 sbs2 =
let !len1 = length sbs1
Expand All @@ -661,7 +629,6 @@ compareBytes sbs1 sbs2 =
| len2 < len1 -> GT
| otherwise -> EQ


------------------------------------------------------------------------
-- Appending and concatenation

Expand Down Expand Up @@ -1597,8 +1564,6 @@ findIndices k = \sbs ->
| otherwise = go (n + 1)
in go 0



------------------------------------------------------------------------
-- Exported low level operations

Expand Down
3 changes: 3 additions & 0 deletions bytestring.cabal
Expand Up @@ -74,6 +74,9 @@ source-repository head
library
build-depends: base >= 4.9 && < 5, ghc-prim, deepseq, template-haskell

if impl(ghc < 9.4)
build-depends: data-array-byte >= 0.1 && < 0.2

exposed-modules: Data.ByteString
Data.ByteString.Char8
Data.ByteString.Unsafe
Expand Down

0 comments on commit 91a727c

Please sign in to comment.