Skip to content

Commit

Permalink
Switch Data.ByteString.Short to reuse Data.Array.Byte
Browse files Browse the repository at this point in the history
  • Loading branch information
Bodigrim committed Oct 7, 2022
1 parent dcfe741 commit 0a4f471
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 79 deletions.
2 changes: 2 additions & 0 deletions Data/ByteString/Short.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE Trustworthy #-}

-- |
Expand Down Expand Up @@ -28,6 +29,7 @@ module Data.ByteString.Short (
-- * The @ShortByteString@ type

ShortByteString(..),
pattern SBS,

-- ** Memory overhead
-- | With GHC, the memory overheads are as follows, expressed in words and
Expand Down
96 changes: 17 additions & 79 deletions Data/ByteString/Short/Internal.hs
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveLift #-}
{-# LANGUAGE ForeignFunctionInterface #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TemplateHaskellQuotes #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UnboxedTuples #-}
{-# LANGUAGE UnliftedFFITypes #-}
{-# LANGUAGE UndecidableInstances #-}
{-# LANGUAGE Unsafe #-}

{-# OPTIONS_HADDOCK not-home #-}

{-# OPTIONS_GHC -fno-warn-name-shadowing -fexpose-all-unfoldings #-}
Expand Down Expand Up @@ -41,6 +46,7 @@ module Data.ByteString.Short.Internal (

-- * The @ShortByteString@ type and representation
ShortByteString(..),
pattern SBS,

-- * Introducing and eliminating 'ShortByteString's
empty,
Expand Down Expand Up @@ -162,6 +168,8 @@ import Data.ByteString.Internal
, checkedAdd
)

import Data.Array.Byte
( ByteArray(..) )
import Data.Bits
( FiniteBits (finiteBitSize)
, shiftL
Expand All @@ -172,21 +180,17 @@ import Data.Bits
, (.|.)
)
import Data.Data
( Data(..)
, mkNoRepType
)
( Data(..) )
import Data.Monoid
( Monoid(..) )
import Data.Semigroup
( Semigroup((<>)) )
( Semigroup )
import Data.String
( IsString(..) )
import Data.Typeable
( Typeable )
import Control.Applicative
( pure )
import Control.DeepSeq
( NFData(..) )
( NFData )
import Control.Exception
( assert )
import Control.Monad
Expand Down Expand Up @@ -269,7 +273,6 @@ import qualified Data.ByteString.Internal as BS

import qualified Data.List as List
import qualified GHC.Exts
import qualified Language.Haskell.TH.Lib as TH
import qualified Language.Haskell.TH.Syntax as TH

-- | A compact representation of a 'Word8' vector.
Expand All @@ -279,77 +282,28 @@ import qualified Language.Haskell.TH.Syntax as TH
-- 'ByteString' (at the cost of copying the string data). It supports very few
-- other operations.
--
data ShortByteString = SBS ByteArray#
deriving Typeable

-- | @since 0.11.2.0
instance TH.Lift ShortByteString where
#if MIN_VERSION_template_haskell(2,16,0)
lift sbs = [| unsafePackLenLiteral |]
`TH.appE` TH.litE (TH.integerL (fromIntegral len))
`TH.appE` TH.litE (TH.BytesPrimL $ TH.Bytes ptr 0 (fromIntegral len))
where
BS ptr len = fromShort sbs
#else
lift sbs = [| unsafePackLenLiteral |]
`TH.appE` TH.litE (TH.integerL (fromIntegral len))
`TH.appE` TH.litE (TH.StringPrimL $ BS.unpackBytes bs)
where
bs@(BS _ len) = fromShort sbs
#endif

#if MIN_VERSION_template_haskell(2,17,0)
liftTyped = TH.unsafeCodeCoerce . TH.lift
#elif MIN_VERSION_template_haskell(2,16,0)
liftTyped = TH.unsafeTExpCoerce . TH.lift
#endif

-- The ByteArray# representation is always word sized and aligned but with a
-- known byte length. Our representation choice for ShortByteString is to leave
-- the 0--3 trailing bytes undefined. This means we can use word-sized writes,
-- but we have to be careful with reads, see equateBytes and compareBytes below.

newtype ShortByteString = ShortByteString { unShortByteString :: ByteArray }
deriving (Eq, Semigroup, Monoid, TH.Lift, GHC.Exts.IsList, Data, NFData)

instance Eq ShortByteString where
(==) = equateBytes
pattern SBS :: ByteArray# -> ShortByteString
pattern SBS x = ShortByteString (ByteArray x)
{-# COMPLETE SBS #-}

-- | Lexicographic order.
instance Ord ShortByteString where
compare = compareBytes

instance Semigroup ShortByteString where
(<>) = append

instance Monoid ShortByteString where
mempty = empty
mappend = (<>)
mconcat = concat

instance NFData ShortByteString where
rnf SBS{} = ()

instance Show ShortByteString where
showsPrec p ps r = showsPrec p (unpackChars ps) r

instance Read ShortByteString where
readsPrec p str = [ (packChars x, y) | (x, y) <- readsPrec p str ]

-- | @since 0.10.12.0
instance GHC.Exts.IsList ShortByteString where
type Item ShortByteString = Word8
fromList = packBytes
toList = unpack

-- | Beware: 'fromString' truncates multi-byte characters to octets.
-- e.g. "枯朶に烏のとまりけり秋の暮" becomes �6k�nh~�Q��n�
instance IsString ShortByteString where
fromString = packChars

instance Data ShortByteString where
gfoldl f z txt = z packBytes `f` unpack txt
toConstr _ = error "Data.ByteString.Short.ShortByteString.toConstr"
gunfold _ _ = error "Data.ByteString.Short.ShortByteString.gunfold"
dataTypeOf _ = mkNoRepType "Data.ByteString.Short.ShortByteString"

------------------------------------------------------------------------
-- Simple operations

Expand Down Expand Up @@ -402,12 +356,6 @@ indexError sbs i =
moduleError "index" $ "error in array index: " ++ show i
++ " not in range [0.." ++ show (length sbs) ++ "]"

-- | @since 0.11.2.0
unsafePackLenLiteral :: Int -> Addr# -> ShortByteString
unsafePackLenLiteral len addr# =
-- createFromPtr allocates, so accursedUnutterablePerformIO is wrong
unsafeDupablePerformIO $ createFromPtr (Ptr addr#) len

------------------------------------------------------------------------
-- Internal utils

Expand Down Expand Up @@ -642,13 +590,6 @@ unpackAppendBytesStrict !sbs off len = go (off-1) (off-1 + len)
------------------------------------------------------------------------
-- Eq and Ord implementations

equateBytes :: ShortByteString -> ShortByteString -> Bool
equateBytes sbs1 sbs2 =
let !len1 = length sbs1
!len2 = length sbs2
in len1 == len2
&& 0 == compareByteArrays (asBA sbs1) (asBA sbs2) len1

compareBytes :: ShortByteString -> ShortByteString -> Ordering
compareBytes sbs1 sbs2 =
let !len1 = length sbs1
Expand All @@ -661,7 +602,6 @@ compareBytes sbs1 sbs2 =
| len2 < len1 -> GT
| otherwise -> EQ


------------------------------------------------------------------------
-- Appending and concatenation

Expand Down Expand Up @@ -1597,8 +1537,6 @@ findIndices k = \sbs ->
| otherwise = go (n + 1)
in go 0



------------------------------------------------------------------------
-- Exported low level operations

Expand Down
3 changes: 3 additions & 0 deletions bytestring.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ source-repository head
library
build-depends: base >= 4.9 && < 5, ghc-prim, deepseq, template-haskell

if impl(ghc < 9.4)
build-depends: data-array-byte >= 0.1 && < 0.2

exposed-modules: Data.ByteString
Data.ByteString.Char8
Data.ByteString.Unsafe
Expand Down

0 comments on commit 0a4f471

Please sign in to comment.