Permalink
Browse files

Add `Distribution.Simple.Utils.ShortText` type (#3898)

This implements a type with a compact representation of `[Char]`.

The data is stored internally as UTF8 in an 'Data.ByteString.Short.ShortByteString'
when compiled against `bytestring >= 0.10.4`, and otherwise in a
plain old `[Char]`.

`ShortByteString` is available only from `bytestring` 0.10.4 on, and GHC
7.8.4 was the first GHC to bundle `binary-0.10.4`. So this fallback
affects mostly only GHC 7.6 and older.

Note: Originally a strict `ByteString` was used as fallback for this patch. However, the 
`[Char]` fallback avoids pinned memory and may be more preferable when dealing with
many small `ShortText`s
  • Loading branch information...
1 parent bb2026c commit 993d20a2e9b8fb29aefaa2c266f31177a00a5ee6 @hvr hvr committed on GitHub Sep 28, 2016
Showing with 140 additions and 0 deletions.
  1. +113 −0 Cabal/Distribution/Simple/Utils.hs
  2. +2 −0 Cabal/changelog
  3. +25 −0 Cabal/tests/UnitTests/Distribution/Simple/Utils.hs
View
113 Cabal/Distribution/Simple/Utils.hs
@@ -2,6 +2,8 @@
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE RankNTypes #-}
+{-# LANGUAGE DeriveGeneric #-}
+
-----------------------------------------------------------------------------
-- |
-- Module : Distribution.Simple.Utils
@@ -150,10 +152,16 @@ module Distribution.Simple.Utils (
-- * FilePath stuff
isAbsoluteOnAnyPlatform,
isRelativeOnAnyPlatform,
+
+ -- * 'ShortText' type
+ ShortText,
+ toShortText,
+ fromShortText,
) where
import Prelude ()
import Distribution.Compat.Prelude
+import Data.String (IsString(..))
import Distribution.Text
import Distribution.Package
@@ -197,6 +205,16 @@ import qualified Data.Set as Set
import qualified Data.ByteString as SBS
+#if defined(MIN_VERSION_bytestring)
+# if MIN_VERSION_bytestring(0,10,4)
+# define HAVE_SHORTBYTESTRING 1
+# endif
+#endif
+
+#if HAVE_SHORTBYTESTRING
+import qualified Data.ByteString.Short as BS.Short
+#endif
+
import System.Directory
( Permissions(executable), getDirectoryContents, getPermissions
, doesDirectoryExist, doesFileExist, removeFile, findExecutable
@@ -1441,6 +1459,26 @@ toUTF8 (c:cs)
: toUTF8 cs
where w = ord c
+-- | Variant of 'toUTF8' operating on 'Word8's directly
+toUTF8BSImpl :: String -> [Word8]
+toUTF8BSImpl [] = []
+toUTF8BSImpl (c:cs)
+ | c <= '\x07F' = w
+ : toUTF8BSImpl cs
+ | c <= '\x7FF' = (0xC0 .|. (w `shiftR` 6))
+ : (0x80 .|. (w .&. 0x3F))
+ : toUTF8BSImpl cs
+ | c <= '\xFFFF'= (0xE0 .|. (w `shiftR` 12))
+ : (0x80 .|. ((w `shiftR` 6) .&. 0x3F))
+ : (0x80 .|. (w .&. 0x3F))
+ : toUTF8BSImpl cs
+ | otherwise = (0xf0 .|. (w `shiftR` 18))
+ : (0x80 .|. ((w `shiftR` 12) .&. 0x3F))
+ : (0x80 .|. ((w `shiftR` 6) .&. 0x3F))
+ : (0x80 .|. (w .&. 0x3F))
+ : toUTF8BSImpl cs
+ where w = fromIntegral (ord c) :: Word8
+
-- | Whether BOM is at the beginning of the input
startsWithBOM :: String -> Bool
startsWithBOM ('\xFEFF':_) = True
@@ -1626,3 +1664,78 @@ isAbsoluteOnAnyPlatform _ = False
-- | @isRelativeOnAnyPlatform = not . 'isAbsoluteOnAnyPlatform'@
isRelativeOnAnyPlatform :: FilePath -> Bool
isRelativeOnAnyPlatform = not . isAbsoluteOnAnyPlatform
+
+-- ------------------------------------------------------------
+-- * 'ShortText' type
+-- ------------------------------------------------------------
+
+-- TODO: if we start using this internally for more opaque types in
+-- Cabal then we will likely need to promote it to it's own module in
+-- Distribution.* to avoid cycles, or just to maintain the sanity of
+-- the Distribution.* vs Distribution.Simple.* distinction.
+
+-- | Construct 'ShortText' from 'String'
+toShortText :: String -> ShortText
+
+-- | Convert 'ShortText' to 'String'
+fromShortText :: ShortText -> String
+
+-- | Compact representation of short 'Strings'
+--
+-- The data is stored internally as UTF8 in an
+-- 'BS.Short.ShortByteString' when compiled against @bytestring >=
+-- 0.10.4@, and otherwise the fallback is to use plain old non-compat
+-- '[Char]'.
+--
+-- Note: This type is for internal uses (such as e.g. 'PackageName')
+-- and shall not be exposed in Cabal's API
+--
+-- @since 2.0.0
+#if HAVE_SHORTBYTESTRING
+newtype ShortText = ST { unST :: BS.Short.ShortByteString }
+ deriving (Eq,Ord,Generic)
+
+# if MIN_VERSION_binary(0,8,1)
+instance Binary ShortText where
+ put = put . unST
+ get = fmap ST get
+# else
+instance Binary ShortText where
+ put = put . BS.Short.fromShort . unST
+ get = fmap (ST . BS.Short.toShort) get
+# endif
+
+toShortText = ST . BS.Short.pack . toUTF8BSImpl
+
+fromShortText = fromUTF8BSImpl . BS.Short.unpack . unST
+#else
+newtype ShortText = ST { unST :: String }
+ deriving (Eq,Ord,Generic)
+
+instance Binary ShortText where
+ put = put . toUTF8BSImpl . unST
+ get = fmap (ST . fromUTF8BSImpl) get
+
+toShortText = ST
+
+fromShortText = unST
+#endif
+
+instance NFData ShortText where
+ rnf = rnf . unST
+
+instance Show ShortText where
+ show = show . fromShortText
+
+instance Read ShortText where
+ readsPrec p = map (first toShortText) . readsPrec p
+
+instance Semigroup ShortText where
+ ST a <> ST b = ST (mappend a b)
+
+instance Monoid ShortText where
+ mempty = ST mempty
+ mappend = (<>)
+
+instance IsString ShortText where
+ fromString = toShortText
View
2 Cabal/changelog
@@ -90,6 +90,8 @@
call site/stack of a logging output respectively (these
are only supported if Cabal is built with GHC 8.0/7.10.2
or greater, respectively).
+ * New `Distribution.Simple.Utils.ShortText` type for representing
+ short text strings compactly (#3898)
1.24.0.0 Ryan Thomas <ryan@ryant.org> March 2016
* Support GHC 8.
View
25 Cabal/tests/UnitTests/Distribution/Simple/Utils.hs
@@ -5,6 +5,7 @@ module UnitTests.Distribution.Simple.Utils
import Distribution.Simple.Utils
import Distribution.Verbosity
+import Data.Monoid as Mon
import Data.IORef
import System.Directory ( doesDirectoryExist, doesFileExist
, getTemporaryDirectory
@@ -15,6 +16,9 @@ import qualified Control.Exception as Exception
import Test.Tasty
import Test.Tasty.HUnit
+import Test.Tasty.QuickCheck
+
+import Distribution.Compat.Binary (encode, decode)
withTempFileTest :: Assertion
withTempFileTest = do
@@ -83,6 +87,22 @@ rawSystemStdInOutTextDecodingTest
Left err | isDoesNotExistError err -> Exception.throwIO err -- no ghc!
| otherwise -> return ()
+
+
+prop_ShortTextOrd :: String -> String -> Bool
+prop_ShortTextOrd a b = compare a b == compare (toShortText a) (toShortText b)
+
+prop_ShortTextMonoid :: String -> String -> Bool
+prop_ShortTextMonoid a b = Mon.mappend a b == fromShortText (mappend (toShortText a) (toShortText b))
+
+prop_ShortTextId :: String -> Bool
+prop_ShortTextId a = (fromShortText . toShortText) a == a
+
+prop_ShortTextBinaryId :: String -> Bool
+prop_ShortTextBinaryId a = (decode . encode) a' == a'
+ where
+ a' = toShortText a
+
tests :: [TestTree]
tests =
[ testCase "withTempFile works as expected" $
@@ -95,4 +115,9 @@ tests =
withTempDirRemovedTest
, testCase "rawSystemStdInOut reports text decoding errors" $
rawSystemStdInOutTextDecodingTest
+
+ , testProperty "ShortText Id" prop_ShortTextId
+ , testProperty "ShortText Ord" prop_ShortTextOrd
+ , testProperty "ShortText Monoid" prop_ShortTextMonoid
+ , testProperty "ShortText BinaryId" prop_ShortTextBinaryId
]

0 comments on commit 993d20a

Please sign in to comment.