From 64782d1cf452afb53640ca8ae04091e8015e3e84 Mon Sep 17 00:00:00 2001 From: Bodigrim Date: Sat, 7 Aug 2021 23:49:38 +0100 Subject: [PATCH] Switch Data.ByteString.Short to reuse Data.Array.Byte --- .github/workflows/ci.yml | 5 +- Data/ByteString/Short/Internal.hs | 95 +++++++------------------------ bytestring.cabal | 3 + 3 files changed, 25 insertions(+), 78 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3c93fa520..5724e8849 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -133,10 +133,11 @@ jobs: githubToken: ${{ github.token }} install: | apt-get update -y - apt-get install -y ghc libghc-tasty-quickcheck-dev + apt-get install -y curl ghc libghc-tasty-quickcheck-dev run: | + curl -s https://hackage.haskell.org/package/data-array-byte-0.1/data-array-byte-0.1.tar.gz | tar xz ghc --version - ghc --make -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s + ghc --make -Iinclude -itests:tests/builder:data-array-byte-0.1 -o Main cbits/*.c tests/Main.hs +RTS -s ./Main +RTS -s bounds-checking: diff --git a/Data/ByteString/Short/Internal.hs b/Data/ByteString/Short/Internal.hs index 8639a3e36..5283a6731 100644 --- a/Data/ByteString/Short/Internal.hs +++ b/Data/ByteString/Short/Internal.hs @@ -1,10 +1,13 @@ {-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE DeriveDataTypeable #-} +{-# LANGUAGE DeriveLift #-} {-# LANGUAGE ForeignFunctionInterface #-} +{-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE MagicHash #-} {-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE TemplateHaskellQuotes #-} @@ -13,6 +16,7 @@ {-# LANGUAGE UnboxedTuples #-} {-# LANGUAGE UnliftedFFITypes #-} {-# LANGUAGE Unsafe #-} + {-# OPTIONS_HADDOCK not-home #-} {-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-} @@ -40,7 +44,7 @@ module Data.ByteString.Short.Internal ( -- * The @ShortByteString@ type and representation - ShortByteString(..), + ShortByteString(.., SBS), -- * Introducing and eliminating 'ShortByteString's empty, @@ -162,6 +166,8 @@ import Data.ByteString.Internal , checkedAdd ) +import Data.Array.Byte + ( ByteArray(..) ) import Data.Bits ( FiniteBits (finiteBitSize) , shiftL @@ -172,21 +178,17 @@ import Data.Bits , (.|.) ) import Data.Data - ( Data(..) - , mkNoRepType - ) + ( Data(..) ) import Data.Monoid ( Monoid(..) ) import Data.Semigroup - ( Semigroup((<>)) ) + ( Semigroup ) import Data.String ( IsString(..) ) -import Data.Typeable - ( Typeable ) import Control.Applicative ( pure ) import Control.DeepSeq - ( NFData(..) ) + ( NFData ) import Control.Exception ( assert ) import Control.Monad @@ -269,7 +271,6 @@ import qualified Data.ByteString.Internal as BS import qualified Data.List as List import qualified GHC.Exts -import qualified Language.Haskell.TH.Lib as TH import qualified Language.Haskell.TH.Syntax as TH -- | A compact representation of a 'Word8' vector. @@ -279,54 +280,17 @@ import qualified Language.Haskell.TH.Syntax as TH -- 'ByteString' (at the cost of copying the string data). It supports very few -- other operations. -- -data ShortByteString = SBS ByteArray# - deriving Typeable - --- | @since 0.11.2.0 -instance TH.Lift ShortByteString where -#if MIN_VERSION_template_haskell(2,16,0) - lift sbs = [| unsafePackLenLiteral |] - `TH.appE` TH.litE (TH.integerL (fromIntegral len)) - `TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len)) - where - BS ptr len = fromShort sbs -#else - lift sbs = [| unsafePackLenLiteral |] - `TH.appE` TH.litE (TH.integerL (fromIntegral len)) - `TH.appE` TH.litE (TH.StringPrimL $ BS.unpackBytes bs) - where - bs@(BS _ len) = fromShort sbs -#endif - -#if MIN_VERSION_template_haskell(2,17,0) - liftTyped = TH.unsafeCodeCoerce . TH.lift -#elif MIN_VERSION_template_haskell(2,16,0) - liftTyped = TH.unsafeTExpCoerce . TH.lift -#endif - --- The ByteArray# representation is always word sized and aligned but with a --- known byte length. Our representation choice for ShortByteString is to leave --- the 0--3 trailing bytes undefined. This means we can use word-sized writes, --- but we have to be careful with reads, see equateBytes and compareBytes below. - +newtype ShortByteString = ShortByteString { unShortByteString :: ByteArray } + deriving (Eq, Semigroup, Monoid, TH.Lift, Data, NFData) -instance Eq ShortByteString where - (==) = equateBytes +pattern SBS :: ByteArray# -> ShortByteString +pattern SBS x = ShortByteString (ByteArray x) +{-# COMPLETE SBS #-} +-- | Lexicographic order. instance Ord ShortByteString where compare = compareBytes -instance Semigroup ShortByteString where - (<>) = append - -instance Monoid ShortByteString where - mempty = empty - mappend = (<>) - mconcat = concat - -instance NFData ShortByteString where - rnf SBS{} = () - instance Show ShortByteString where showsPrec p ps r = showsPrec p (unpackChars ps) r @@ -336,20 +300,15 @@ instance Read ShortByteString where -- | @since 0.10.12.0 instance GHC.Exts.IsList ShortByteString where type Item ShortByteString = Word8 - fromList = packBytes - toList = unpack + fromList = ShortByteString . GHC.Exts.fromList + fromListN = (ShortByteString .) . GHC.Exts.fromListN + toList = GHC.Exts.toList . unShortByteString -- | Beware: 'fromString' truncates multi-byte characters to octets. -- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n� instance IsString ShortByteString where fromString = packChars -instance Data ShortByteString where - gfoldl f z txt = z packBytes `f` unpack txt - toConstr _ = error "Data.ByteString.Short.ShortByteString.toConstr" - gunfold _ _ = error "Data.ByteString.Short.ShortByteString.gunfold" - dataTypeOf _ = mkNoRepType "Data.ByteString.Short.ShortByteString" - ------------------------------------------------------------------------ -- Simple operations @@ -402,12 +361,6 @@ indexError sbs i = moduleError "index" $ "error in array index: " ++ show i ++ " not in range [0.." ++ show (length sbs) ++ "]" --- | @since 0.11.2.0 -unsafePackLenLiteral :: Int -> Addr# -> ShortByteString -unsafePackLenLiteral len addr# = - -- createFromPtr allocates, so accursedUnutterablePerformIO is wrong - unsafeDupablePerformIO $ createFromPtr (Ptr addr#) len - ------------------------------------------------------------------------ -- Internal utils @@ -642,13 +595,6 @@ unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len) ------------------------------------------------------------------------ -- Eq and Ord implementations -equateBytes :: ShortByteString -> ShortByteString -> Bool -equateBytes sbs1 sbs2 = - let !len1 = length sbs1 - !len2 = length sbs2 - in len1 == len2 - && 0 == compareByteArrays (asBA sbs1) (asBA sbs2) len1 - compareBytes :: ShortByteString -> ShortByteString -> Ordering compareBytes sbs1 sbs2 = let !len1 = length sbs1 @@ -661,7 +607,6 @@ compareBytes sbs1 sbs2 = | len2 < len1 -> GT | otherwise -> EQ - ------------------------------------------------------------------------ -- Appending and concatenation @@ -1597,8 +1542,6 @@ findIndices k = \sbs -> | otherwise = go (n + 1) in go 0 - - ------------------------------------------------------------------------ -- Exported low level operations diff --git a/bytestring.cabal b/bytestring.cabal index fd5656fc4..bd47cfc9a 100644 --- a/bytestring.cabal +++ b/bytestring.cabal @@ -74,6 +74,9 @@ source-repository head library build-depends: base >= 4.9 && < 5, ghc-prim, deepseq, template-haskell + if impl(ghc < 9.4) + build-depends: data-array-byte >= 0.1 && < 0.2 + exposed-modules: Data.ByteString Data.ByteString.Char8 Data.ByteString.Unsafe