Skip to content

Commit

Permalink
Switch Data.ByteString.Short to reuse Data.Array.Byte
Browse files Browse the repository at this point in the history
  • Loading branch information
Bodigrim committed Oct 7, 2022
1 parent dcfe741 commit 64782d1
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 78 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -133,10 +133,11 @@ jobs:
githubToken: ${{ github.token }}
install: |
apt-get update -y
apt-get install -y ghc libghc-tasty-quickcheck-dev
apt-get install -y curl ghc libghc-tasty-quickcheck-dev
run: |
curl -s https://hackage.haskell.org/package/data-array-byte-0.1/data-array-byte-0.1.tar.gz | tar xz
ghc --version
ghc --make -Iinclude -itests:tests/builder -o Main cbits/*.c tests/Main.hs +RTS -s
ghc --make -Iinclude -itests:tests/builder:data-array-byte-0.1 -o Main cbits/*.c tests/Main.hs +RTS -s
./Main +RTS -s
bounds-checking:
Expand Down
95 changes: 19 additions & 76 deletions Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveLift #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskellQuotes #-}
Expand All @@ -13,6 +16,7 @@
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE Unsafe #-}

{-# OPTIONS_HADDOCK not-home #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}
Expand Down Expand Up @@ -40,7 +44,7 @@
module Data.ByteString.Short.Internal (

-- * The @ShortByteString@ type and representation
ShortByteString(..),
ShortByteString(.., SBS),

-- * Introducing and eliminating 'ShortByteString's
empty,
Expand Down Expand Up @@ -162,6 +166,8 @@ import Data.ByteString.Internal
, checkedAdd
)

import Data.Array.Byte
( ByteArray(..) )
import Data.Bits
( FiniteBits (finiteBitSize)
, shiftL
Expand All @@ -172,21 +178,17 @@ import Data.Bits
, (.|.)
)
import Data.Data
( Data(..)
, mkNoRepType
)
( Data(..) )
import Data.Monoid
( Monoid(..) )
import Data.Semigroup
( Semigroup((<>)) )
( Semigroup )
import Data.String
( IsString(..) )
import Data.Typeable
( Typeable )
import Control.Applicative
( pure )
import Control.DeepSeq
( NFData(..) )
( NFData )
import Control.Exception
( assert )
import Control.Monad
Expand Down Expand Up @@ -269,7 +271,6 @@ import qualified Data.ByteString.Internal as BS

import qualified Data.List as List
import qualified GHC.Exts
import qualified Language.Haskell.TH.Lib as TH
import qualified Language.Haskell.TH.Syntax as TH

-- | A compact representation of a 'Word8' vector.
Expand All @@ -279,54 +280,17 @@ import qualified Language.Haskell.TH.Syntax as TH
-- 'ByteString' (at the cost of copying the string data). It supports very few
-- other operations.
--
data ShortByteString = SBS ByteArray#
deriving Typeable

-- | @since 0.11.2.0
instance TH.Lift ShortByteString where
#if MIN_VERSION_template_haskell(2,16,0)
lift sbs = [| unsafePackLenLiteral |]
`TH.appE` TH.litE (TH.integerL (fromIntegral len))
`TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len))
where
BS ptr len = fromShort sbs
#else
lift sbs = [| unsafePackLenLiteral |]
`TH.appE` TH.litE (TH.integerL (fromIntegral len))
`TH.appE` TH.litE (TH.StringPrimL $ BS.unpackBytes bs)
where
bs@(BS _ len) = fromShort sbs
#endif

#if MIN_VERSION_template_haskell(2,17,0)
liftTyped = TH.unsafeCodeCoerce . TH.lift
#elif MIN_VERSION_template_haskell(2,16,0)
liftTyped = TH.unsafeTExpCoerce . TH.lift
#endif

-- The ByteArray# representation is always word sized and aligned but with a
-- known byte length. Our representation choice for ShortByteString is to leave
-- the 0--3 trailing bytes undefined. This means we can use word-sized writes,
-- but we have to be careful with reads, see equateBytes and compareBytes below.

newtype ShortByteString = ShortByteString { unShortByteString :: ByteArray }
deriving (Eq, Semigroup, Monoid, TH.Lift, Data, NFData)

instance Eq ShortByteString where
(==) = equateBytes
pattern SBS :: ByteArray# -> ShortByteString
pattern SBS x = ShortByteString (ByteArray x)
{-# COMPLETE SBS #-}

-- | Lexicographic order.
instance Ord ShortByteString where
compare = compareBytes

instance Semigroup ShortByteString where
(<>) = append

instance Monoid ShortByteString where
mempty = empty
mappend = (<>)
mconcat = concat

instance NFData ShortByteString where
rnf SBS{} = ()

instance Show ShortByteString where
showsPrec p ps r = showsPrec p (unpackChars ps) r

Expand All @@ -336,20 +300,15 @@ instance Read ShortByteString where
-- | @since 0.10.12.0
instance GHC.Exts.IsList ShortByteString where
type Item ShortByteString = Word8
fromList = packBytes
toList = unpack
fromList = ShortByteString . GHC.Exts.fromList
fromListN = (ShortByteString .) . GHC.Exts.fromListN
toList = GHC.Exts.toList . unShortByteString

-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ShortByteString where
fromString = packChars

instance Data ShortByteString where
gfoldl f z txt = z packBytes `f` unpack txt
toConstr _ = error "Data.ByteString.Short.ShortByteString.toConstr"
gunfold _ _ = error "Data.ByteString.Short.ShortByteString.gunfold"
dataTypeOf _ = mkNoRepType "Data.ByteString.Short.ShortByteString"

------------------------------------------------------------------------
-- Simple operations

Expand Down Expand Up @@ -402,12 +361,6 @@ indexError sbs i =
moduleError "index" $ "error in array index: " ++ show i
++ " not in range [0.." ++ show (length sbs) ++ "]"

-- | @since 0.11.2.0
unsafePackLenLiteral :: Int -> Addr# -> ShortByteString
unsafePackLenLiteral len addr# =
-- createFromPtr allocates, so accursedUnutterablePerformIO is wrong
unsafeDupablePerformIO $ createFromPtr (Ptr addr#) len

------------------------------------------------------------------------
-- Internal utils

Expand Down Expand Up @@ -642,13 +595,6 @@ unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len)
------------------------------------------------------------------------
-- Eq and Ord implementations

equateBytes :: ShortByteString -> ShortByteString -> Bool
equateBytes sbs1 sbs2 =
let !len1 = length sbs1
!len2 = length sbs2
in len1 == len2
&& 0 == compareByteArrays (asBA sbs1) (asBA sbs2) len1

compareBytes :: ShortByteString -> ShortByteString -> Ordering
compareBytes sbs1 sbs2 =
let !len1 = length sbs1
Expand All @@ -661,7 +607,6 @@ compareBytes sbs1 sbs2 =
| len2 < len1 -> GT
| otherwise -> EQ


------------------------------------------------------------------------
-- Appending and concatenation

Expand Down Expand Up @@ -1597,8 +1542,6 @@ findIndices k = \sbs ->
| otherwise = go (n + 1)
in go 0



------------------------------------------------------------------------
-- Exported low level operations

Expand Down
3 changes: 3 additions & 0 deletions bytestring.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ source-repository head
library
build-depends: base >= 4.9 && < 5, ghc-prim, deepseq, template-haskell

if impl(ghc < 9.4)
build-depends: data-array-byte >= 0.1 && < 0.2

exposed-modules: Data.ByteString
Data.ByteString.Char8
Data.ByteString.Unsafe
Expand Down

0 comments on commit 64782d1

Please sign in to comment.