Skip to content

Commit

Permalink
Add Distribution.Simple.Utils.ShortText type (#3898)
Browse files Browse the repository at this point in the history
This implements a type with a compact representation of `[Char]`.

The data is stored internally as UTF8 in an 'Data.ByteString.Short.ShortByteString'
when compiled against `bytestring >= 0.10.4`, and otherwise in a
plain old `[Char]`.

`ShortByteString` is available only from `bytestring` 0.10.4 on, and GHC
7.8.4 was the first GHC to bundle `binary-0.10.4`. So this fallback
affects mostly only GHC 7.6 and older.

Note: Originally a strict `ByteString` was used as fallback for this patch. However, the 
`[Char]` fallback avoids pinned memory and may be more preferable when dealing with
many small `ShortText`s
  • Loading branch information
hvr committed Sep 28, 2016
1 parent bb2026c commit 993d20a
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 0 deletions.
113 changes: 113 additions & 0 deletions Cabal/Distribution/Simple/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE DeriveGeneric #-}

-----------------------------------------------------------------------------
-- |
-- Module : Distribution.Simple.Utils
Expand Down Expand Up @@ -150,10 +152,16 @@ module Distribution.Simple.Utils (
-- * FilePath stuff
isAbsoluteOnAnyPlatform,
isRelativeOnAnyPlatform,

-- * 'ShortText' type
ShortText,
toShortText,
fromShortText,
) where

import Prelude ()
import Distribution.Compat.Prelude
import Data.String (IsString(..))

import Distribution.Text
import Distribution.Package
Expand Down Expand Up @@ -197,6 +205,16 @@ import qualified Data.Set as Set

import qualified Data.ByteString as SBS

#if defined(MIN_VERSION_bytestring)
# if MIN_VERSION_bytestring(0,10,4)
# define HAVE_SHORTBYTESTRING 1
# endif
#endif

#if HAVE_SHORTBYTESTRING
import qualified Data.ByteString.Short as BS.Short
#endif

import System.Directory
( Permissions(executable), getDirectoryContents, getPermissions
, doesDirectoryExist, doesFileExist, removeFile, findExecutable
Expand Down Expand Up @@ -1441,6 +1459,26 @@ toUTF8 (c:cs)
: toUTF8 cs
where w = ord c

-- | Variant of 'toUTF8' operating on 'Word8's directly
toUTF8BSImpl :: String -> [Word8]
toUTF8BSImpl [] = []
toUTF8BSImpl (c:cs)
| c <= '\x07F' = w
: toUTF8BSImpl cs
| c <= '\x7FF' = (0xC0 .|. (w `shiftR` 6))
: (0x80 .|. (w .&. 0x3F))
: toUTF8BSImpl cs
| c <= '\xFFFF'= (0xE0 .|. (w `shiftR` 12))
: (0x80 .|. ((w `shiftR` 6) .&. 0x3F))
: (0x80 .|. (w .&. 0x3F))
: toUTF8BSImpl cs
| otherwise = (0xf0 .|. (w `shiftR` 18))
: (0x80 .|. ((w `shiftR` 12) .&. 0x3F))
: (0x80 .|. ((w `shiftR` 6) .&. 0x3F))
: (0x80 .|. (w .&. 0x3F))
: toUTF8BSImpl cs
where w = fromIntegral (ord c) :: Word8

-- | Whether BOM is at the beginning of the input
startsWithBOM :: String -> Bool
startsWithBOM ('\xFEFF':_) = True
Expand Down Expand Up @@ -1626,3 +1664,78 @@ isAbsoluteOnAnyPlatform _ = False
-- | @isRelativeOnAnyPlatform = not . 'isAbsoluteOnAnyPlatform'@
isRelativeOnAnyPlatform :: FilePath -> Bool
isRelativeOnAnyPlatform = not . isAbsoluteOnAnyPlatform

-- ------------------------------------------------------------
-- * 'ShortText' type
-- ------------------------------------------------------------

-- TODO: if we start using this internally for more opaque types in
-- Cabal then we will likely need to promote it to it's own module in
-- Distribution.* to avoid cycles, or just to maintain the sanity of
-- the Distribution.* vs Distribution.Simple.* distinction.

-- | Construct 'ShortText' from 'String'
toShortText :: String -> ShortText

-- | Convert 'ShortText' to 'String'
fromShortText :: ShortText -> String

-- | Compact representation of short 'Strings'
--
-- The data is stored internally as UTF8 in an
-- 'BS.Short.ShortByteString' when compiled against @bytestring >=
-- 0.10.4@, and otherwise the fallback is to use plain old non-compat
-- '[Char]'.
--
-- Note: This type is for internal uses (such as e.g. 'PackageName')
-- and shall not be exposed in Cabal's API
--
-- @since 2.0.0
#if HAVE_SHORTBYTESTRING
newtype ShortText = ST { unST :: BS.Short.ShortByteString }
deriving (Eq,Ord,Generic)

# if MIN_VERSION_binary(0,8,1)
instance Binary ShortText where
put = put . unST
get = fmap ST get
# else
instance Binary ShortText where
put = put . BS.Short.fromShort . unST
get = fmap (ST . BS.Short.toShort) get
# endif

toShortText = ST . BS.Short.pack . toUTF8BSImpl

fromShortText = fromUTF8BSImpl . BS.Short.unpack . unST
#else
newtype ShortText = ST { unST :: String }
deriving (Eq,Ord,Generic)

instance Binary ShortText where
put = put . toUTF8BSImpl . unST
get = fmap (ST . fromUTF8BSImpl) get

toShortText = ST

fromShortText = unST
#endif

instance NFData ShortText where
rnf = rnf . unST

instance Show ShortText where
show = show . fromShortText

instance Read ShortText where
readsPrec p = map (first toShortText) . readsPrec p

instance Semigroup ShortText where
ST a <> ST b = ST (mappend a b)

instance Monoid ShortText where
mempty = ST mempty
mappend = (<>)

instance IsString ShortText where
fromString = toShortText
2 changes: 2 additions & 0 deletions Cabal/changelog
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@
call site/stack of a logging output respectively (these
are only supported if Cabal is built with GHC 8.0/7.10.2
or greater, respectively).
* New `Distribution.Simple.Utils.ShortText` type for representing
short text strings compactly (#3898)

1.24.0.0 Ryan Thomas <ryan@ryant.org> March 2016
* Support GHC 8.
Expand Down
25 changes: 25 additions & 0 deletions Cabal/tests/UnitTests/Distribution/Simple/Utils.hs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ module UnitTests.Distribution.Simple.Utils
import Distribution.Simple.Utils
import Distribution.Verbosity

import Data.Monoid as Mon
import Data.IORef
import System.Directory ( doesDirectoryExist, doesFileExist
, getTemporaryDirectory
Expand All @@ -15,6 +16,9 @@ import qualified Control.Exception as Exception

import Test.Tasty
import Test.Tasty.HUnit
import Test.Tasty.QuickCheck

import Distribution.Compat.Binary (encode, decode)

withTempFileTest :: Assertion
withTempFileTest = do
Expand Down Expand Up @@ -83,6 +87,22 @@ rawSystemStdInOutTextDecodingTest
Left err | isDoesNotExistError err -> Exception.throwIO err -- no ghc!
| otherwise -> return ()



prop_ShortTextOrd :: String -> String -> Bool
prop_ShortTextOrd a b = compare a b == compare (toShortText a) (toShortText b)

prop_ShortTextMonoid :: String -> String -> Bool
prop_ShortTextMonoid a b = Mon.mappend a b == fromShortText (mappend (toShortText a) (toShortText b))

prop_ShortTextId :: String -> Bool
prop_ShortTextId a = (fromShortText . toShortText) a == a

prop_ShortTextBinaryId :: String -> Bool
prop_ShortTextBinaryId a = (decode . encode) a' == a'
where
a' = toShortText a

tests :: [TestTree]
tests =
[ testCase "withTempFile works as expected" $
Expand All @@ -95,4 +115,9 @@ tests =
withTempDirRemovedTest
, testCase "rawSystemStdInOut reports text decoding errors" $
rawSystemStdInOutTextDecodingTest

, testProperty "ShortText Id" prop_ShortTextId
, testProperty "ShortText Ord" prop_ShortTextOrd
, testProperty "ShortText Monoid" prop_ShortTextMonoid
, testProperty "ShortText BinaryId" prop_ShortTextBinaryId
]

0 comments on commit 993d20a

Please sign in to comment.