Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch Data.ByteString.Short to ByteArray #410

Merged
merged 4 commits into from
Oct 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,11 @@ jobs:
githubToken: ${{ github.token }}
install: |
apt-get update -y
apt-get install -y ghc libghc-tasty-quickcheck-dev
apt-get install -y curl ghc libghc-tasty-quickcheck-dev
run: |
curl -s https://hackage.haskell.org/package/data-array-byte-0.1/data-array-byte-0.1.tar.gz | tar xz
ghc --version
ghc --make -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s
ghc --make -Iinclude -itests:tests/builder:data-array-byte-0.1 -o Main cbits/*.c tests/Main.hs +RTS -s
./Main +RTS -s

bounds-checking:
Expand Down
2 changes: 2 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
* [`readInt` returns `Nothing`, if the sequence of digits cannot be represented by an `Int`, instead of overflowing silently](https://github.com/haskell/bytestring/pull/309)
* [Remove `zipWith` rewrite rule](https://github.com/haskell/bytestring/pull/387)
* [Export `unsafeIndex` for ShortByteString which had been accidentally removed in v0.11.3.0](https://github.com/haskell/bytestring/pull/532)
* [`ShortByteString` is now a wrapper over boxed `Data.Array.Byte.ByteArray` instead of unboxed `ByteArray#` directly](https://github.com/haskell/bytestring/pull/410)
* [`fromListN` from `instance IsList ShortByteString` throws an exception if the first argument does not match the length of the second instead of silent ignore](https://github.com/haskell/bytestring/pull/410)


[0.12.0.0]: https://github.com/haskell/bytestring/compare/0.11.3.0...0.12.0.0
Expand Down
103 changes: 34 additions & 69 deletions Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveLift #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskellQuotes #-}
Expand All @@ -13,6 +16,7 @@
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE Unsafe #-}

{-# OPTIONS_HADDOCK not-home #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}
Expand Down Expand Up @@ -40,7 +44,7 @@
module Data.ByteString.Short.Internal (

-- * The @ShortByteString@ type and representation
ShortByteString(..),
ShortByteString(.., SBS),

-- * Introducing and eliminating 'ShortByteString's
empty,
Expand Down Expand Up @@ -162,6 +166,8 @@ import Data.ByteString.Internal
, checkedAdd
)

import Data.Array.Byte
( ByteArray(..) )
import Data.Bits
( FiniteBits (finiteBitSize)
, shiftL
Expand All @@ -172,21 +178,17 @@ import Data.Bits
, (.|.)
)
import Data.Data
( Data(..)
, mkNoRepType
)
( Data(..) )
import Data.Monoid
( Monoid(..) )
import Data.Semigroup
( Semigroup((<>)) )
import Data.String
( IsString(..) )
import Data.Typeable
( Typeable )
import Control.Applicative
( pure )
import Control.DeepSeq
( NFData(..) )
( NFData )
import Control.Exception
( assert )
import Control.Monad
Expand Down Expand Up @@ -269,7 +271,6 @@ import qualified Data.ByteString.Internal as BS

import qualified Data.List as List
import qualified GHC.Exts
import qualified Language.Haskell.TH.Lib as TH
import qualified Language.Haskell.TH.Syntax as TH

-- | A compact representation of a 'Word8' vector.
Expand All @@ -279,43 +280,31 @@ import qualified Language.Haskell.TH.Syntax as TH
-- 'ByteString' (at the cost of copying the string data). It supports very few
-- other operations.
--
data ShortByteString = SBS ByteArray#
deriving Typeable

-- | @since 0.11.2.0
instance TH.Lift ShortByteString where
#if MIN_VERSION_template_haskell(2,16,0)
lift sbs = [| unsafePackLenLiteral |]
`TH.appE` TH.litE (TH.integerL (fromIntegral len))
`TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len))
where
BS ptr len = fromShort sbs
#else
lift sbs = [| unsafePackLenLiteral |]
`TH.appE` TH.litE (TH.integerL (fromIntegral len))
`TH.appE` TH.litE (TH.StringPrimL $ BS.unpackBytes bs)
where
bs@(BS _ len) = fromShort sbs
#endif

#if MIN_VERSION_template_haskell(2,17,0)
liftTyped = TH.unsafeCodeCoerce . TH.lift
#elif MIN_VERSION_template_haskell(2,16,0)
liftTyped = TH.unsafeTExpCoerce . TH.lift
#endif

-- The ByteArray# representation is always word sized and aligned but with a
-- known byte length. Our representation choice for ShortByteString is to leave
-- the 0--3 trailing bytes undefined. This means we can use word-sized writes,
-- but we have to be careful with reads, see equateBytes and compareBytes below.


instance Eq ShortByteString where
(==) = equateBytes

newtype ShortByteString =
-- | @since 0.12.0.0
ShortByteString
{ unShortByteString :: ByteArray
-- ^ @since 0.12.0.0
}
deriving (Eq, TH.Lift, Data, NFData)

-- | Prior to @bytestring-0.12@ 'SBS' was a genuine constructor of 'ShortByteString',
-- but now it is a bundled pattern synonym, provided as a compatibility shim.
pattern SBS :: ByteArray# -> ShortByteString
pattern SBS x = ShortByteString (ByteArray x)
{-# COMPLETE SBS #-}
Bodigrim marked this conversation as resolved.
Show resolved Hide resolved

-- | Lexicographic order.
instance Ord ShortByteString where
compare = compareBytes

-- Instead of deriving Semigroup / Monoid , we stick to our own implementations
-- of mappend / mconcat, because they are safer with regards to overflows
-- (see prop_32bitOverflow_Short_mconcat test).
-- ByteArray is likely to catch up starting from GHC 9.6:
-- * https://gitlab.haskell.org/ghc/ghc/-/merge_requests/8272
-- * https://gitlab.haskell.org/ghc/ghc/-/merge_requests/9128

instance Semigroup ShortByteString where
(<>) = append

Expand All @@ -324,9 +313,6 @@ instance Monoid ShortByteString where
mappend = (<>)
mconcat = concat

instance NFData ShortByteString where
rnf SBS{} = ()

instance Show ShortByteString where
showsPrec p ps r = showsPrec p (unpackChars ps) r

Expand All @@ -336,20 +322,15 @@ instance Read ShortByteString where
-- | @since 0.10.12.0
instance GHC.Exts.IsList ShortByteString where
Bodigrim marked this conversation as resolved.
Show resolved Hide resolved
type Item ShortByteString = Word8
fromList = packBytes
toList = unpack
fromList = ShortByteString . GHC.Exts.fromList
fromListN = (ShortByteString .) . GHC.Exts.fromListN
Bodigrim marked this conversation as resolved.
Show resolved Hide resolved
toList = GHC.Exts.toList . unShortByteString

-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ShortByteString where
fromString = packChars

instance Data ShortByteString where
gfoldl f z txt = z packBytes `f` unpack txt
toConstr _ = error "Data.ByteString.Short.ShortByteString.toConstr"
gunfold _ _ = error "Data.ByteString.Short.ShortByteString.gunfold"
dataTypeOf _ = mkNoRepType "Data.ByteString.Short.ShortByteString"

------------------------------------------------------------------------
-- Simple operations

Expand Down Expand Up @@ -402,12 +383,6 @@ indexError sbs i =
moduleError "index" $ "error in array index: " ++ show i
++ " not in range [0.." ++ show (length sbs) ++ "]"

-- | @since 0.11.2.0
unsafePackLenLiteral :: Int -> Addr# -> ShortByteString
unsafePackLenLiteral len addr# =
-- createFromPtr allocates, so accursedUnutterablePerformIO is wrong
unsafeDupablePerformIO $ createFromPtr (Ptr addr#) len

------------------------------------------------------------------------
-- Internal utils

Expand Down Expand Up @@ -642,13 +617,6 @@ unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len)
------------------------------------------------------------------------
-- Eq and Ord implementations

equateBytes :: ShortByteString -> ShortByteString -> Bool
equateBytes sbs1 sbs2 =
let !len1 = length sbs1
!len2 = length sbs2
in len1 == len2
&& 0 == compareByteArrays (asBA sbs1) (asBA sbs2) len1

compareBytes :: ShortByteString -> ShortByteString -> Ordering
compareBytes sbs1 sbs2 =
let !len1 = length sbs1
Expand All @@ -661,7 +629,6 @@ compareBytes sbs1 sbs2 =
| len2 < len1 -> GT
| otherwise -> EQ


------------------------------------------------------------------------
-- Appending and concatenation

Expand Down Expand Up @@ -1597,8 +1564,6 @@ findIndices k = \sbs ->
| otherwise = go (n + 1)
in go 0



------------------------------------------------------------------------
-- Exported low level operations

Expand Down
3 changes: 3 additions & 0 deletions bytestring.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ source-repository head
library
build-depends: base >= 4.9 && < 5, ghc-prim, deepseq, template-haskell

if impl(ghc < 9.4)
build-depends: data-array-byte >= 0.1 && < 0.2

exposed-modules: Data.ByteString
Data.ByteString.Char8
Data.ByteString.Unsafe
Expand Down