Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Reduce the amount of code generated for string literals

Previously, every Text literal generated a big wad of inefficient
code:

    {-# LANGUAGE OverloadedStrings #-}
    foo :: Text
    foo = "foo"

This would first convert to a String, then to a Text.  To make
matters worse, the code for conversion from String to Text was
inlined at every site where a string literal occurred (expected,
but undesired, behaviour).

In this change, we introduce a direct conversion from Addr# to each
of the Text types, and we ensure that uses of these never result in
excessive code generation.
  • Loading branch information...
commit 2b2cb084c4689c06f1a7851ff8eb1e412eb02c1b 1 parent 0d6d979
@bos authored
Showing with 69 additions and 6 deletions.
  1. +17 −2 Data/Text.hs
  2. +35 −2 Data/Text/Fusion/Common.hs
  3. +17 −2 Data/Text/Lazy.hs
View
19 Data/Text.hs
@@ -1,9 +1,9 @@
-{-# LANGUAGE BangPatterns, CPP, Rank2Types, UnboxedTuples #-}
+{-# LANGUAGE BangPatterns, CPP, MagicHash, Rank2Types, UnboxedTuples #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
-- |
-- Module : Data.Text
--- Copyright : (c) 2009, 2010, 2011 Bryan O'Sullivan,
+-- Copyright : (c) 2009, 2010, 2011, 2012 Bryan O'Sullivan,
-- (c) 2009 Duncan Coutts,
-- (c) 2008, 2009 Tom Harper
--
@@ -222,6 +222,8 @@ import Data.ByteString (ByteString)
import qualified Data.Text.Lazy as L
import Data.Int (Int64)
#endif
+import qualified GHC.CString as GHC
+import GHC.Prim (Addr#)
-- $strict
--
@@ -372,6 +374,19 @@ unpack :: Text -> String
unpack = S.unstreamList . stream
{-# INLINE [1] unpack #-}
+-- | /O(n)/ Convert a literal string into a Text.
+unpackCString# :: Addr# -> Text
+unpackCString# addr# = unstream (S.streamCString# addr#)
+{-# NOINLINE unpackCString# #-}
+
+{-# RULES "TEXT literal" forall a.
+ unstream (S.streamList (L.map safe (GHC.unpackCString# a)))
+ = unpackCString# a #-}
+
+{-# RULES "TEXT literal UTF8" forall a.
+ unstream (S.streamList (L.map safe (GHC.unpackCStringUtf8# a)))
+ = unpackCString# a #-}
+
-- | /O(1)/ Convert a character into a Text. Subject to fusion.
-- Performs replacement on invalid scalar values.
singleton :: Char -> Text
View
37 Data/Text/Fusion/Common.hs
@@ -1,7 +1,7 @@
-{-# LANGUAGE BangPatterns, Rank2Types #-}
+{-# LANGUAGE BangPatterns, MagicHash, Rank2Types #-}
-- |
-- Module : Data.Text.Fusion.Common
--- Copyright : (c) Bryan O'Sullivan 2009
+-- Copyright : (c) Bryan O'Sullivan 2009, 2012
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,
@@ -17,6 +17,7 @@ module Data.Text.Fusion.Common
singleton
, streamList
, unstreamList
+ , streamCString#
-- * Basic interface
, cons
@@ -104,10 +105,13 @@ import Prelude (Bool(..), Char, Eq(..), Int, Integral, Maybe(..),
(&&), fromIntegral, otherwise)
import qualified Data.List as L
import qualified Prelude as P
+import Data.Bits (shiftL)
import Data.Int (Int64)
import Data.Text.Fusion.Internal
import Data.Text.Fusion.CaseMapping (foldMapping, lowerMapping, upperMapping)
import Data.Text.Fusion.Size
+import GHC.Prim (Addr#, chr#, indexCharOffAddr#, ord#)
+import GHC.Types (Char(..), Int(..))
singleton :: Char -> Stream Char
singleton c = Stream next False 1
@@ -131,6 +135,35 @@ unstreamList (Stream next s0 _len) = unfold s0
{-# RULES "STREAM streamList/unstreamList fusion" forall s. streamList (unstreamList s) = s #-}
+-- | Stream the UTF-8-like packed encoding used by GHC to represent
+-- constant strings in generated code.
+--
+-- This encoding uses the byte sequence "\xc0\x80" to represent NUL,
+-- and the string is NUL-terminated.
+streamCString# :: Addr# -> Stream Char
+streamCString# addr = Stream step 0 unknownSize
+ where
+ step !i
+ | b == 0 = Done
+ | b <= 0x7f = Yield (C# b#) (i+1)
+ | b <= 0xdf = let !c = chr $ ((b-0xc0) `shiftL` 6) + next 1
+ in Yield c (i+2)
+ | b <= 0xef = let !c = chr $ ((b-0xe0) `shiftL` 12) +
+ (next 1 `shiftL` 6) +
+ next 2
+ in Yield c (i+3)
+ | otherwise = let !c = chr $ ((b-0xf0) `shiftL` 18) +
+ (next 1 `shiftL` 12) +
+ (next 2 `shiftL` 6) +
+ next 3
+ in Yield c (i+4)
+ where b = I# (ord# b#)
+ next n = I# (ord# (at# (i+n))) - 0x80
+ b# = at# i
+ at# (I# i#) = indexCharOffAddr# addr i#
+ chr (I# i#) = C# (chr# i#)
+{-# INLINE [0] streamCString# #-}
+
-- ----------------------------------------------------------------------------
-- * Basic stream functions
View
19 Data/Text/Lazy.hs
@@ -1,8 +1,8 @@
{-# OPTIONS_GHC -fno-warn-orphans #-}
-{-# LANGUAGE BangPatterns, CPP #-}
+{-# LANGUAGE BangPatterns, MagicHash, CPP #-}
-- |
-- Module : Data.Text.Lazy
--- Copyright : (c) 2009, 2010 Bryan O'Sullivan
+-- Copyright : (c) 2009, 2010, 2012 Bryan O'Sullivan
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,
@@ -217,6 +217,8 @@ import Data.Text.Lazy.Internal (Text(..), chunk, empty, foldlChunks, foldrChunks
import Data.Text.Internal (firstf, safe, textP)
import qualified Data.Text.Util as U
import Data.Text.Lazy.Search (indices)
+import qualified GHC.CString as GHC
+import GHC.Prim (Addr#)
-- $fusion
--
@@ -351,6 +353,19 @@ unpack :: Text -> String
unpack t = S.unstreamList (stream t)
{-# INLINE [1] unpack #-}
+-- | /O(n)/ Convert a literal string into a Text.
+unpackCString# :: Addr# -> Text
+unpackCString# addr# = unstream (S.streamCString# addr#)
+{-# NOINLINE unpackCString# #-}
+
+{-# RULES "TEXT literal" forall a.
+ unstream (S.streamList (L.map safe (GHC.unpackCString# a)))
+ = unpackCString# a #-}
+
+{-# RULES "TEXT literal UTF8" forall a.
+ unstream (S.streamList (L.map safe (GHC.unpackCStringUtf8# a)))
+ = unpackCString# a #-}
+
-- | /O(1)/ Convert a character into a Text. Subject to fusion.
-- Performs replacement on invalid scalar values.
singleton :: Char -> Text
Please sign in to comment.
Something went wrong with that request. Please try again.