Skip to content
This repository
Browse code

Improve ASCII encoding performance in a safer way.

  • Loading branch information...
commit a31c0f7df8fea235633a2eb82fb8d89b85a2fd47 1 parent 4e9d932
Bryan O'Sullivan authored December 23, 2011

Showing 1 changed file with 13 additions and 3 deletions. Show diff stats Hide diff stats

  1. 16  Data/Text/Encoding.hs
16  Data/Text/Encoding.hs
@@ -153,8 +153,9 @@ encodeUtf8 (Text arr off len) = unsafePerformIO $ do
153 153
    where
154 154
     loop n1 m1 ptr = go n1 m1
155 155
      where
  156
+      offLen = off + len
156 157
       go !n !m
157  
-        | n == off+len = return (PS fp 0 m)
  158
+        | n == offLen = return (PS fp 0 m)
158 159
         | otherwise = do
159 160
             let poke8 k v = poke (ptr `plusPtr` k) (fromIntegral v :: Word8)
160 161
                 ensure k act
@@ -168,8 +169,17 @@ encodeUtf8 (Text arr off len) = unsafePerformIO $ do
168 169
                 {-# INLINE ensure #-}
169 170
             case A.unsafeIndex arr n of
170 171
              w| w <= 0x7F  -> ensure 1 $ do
171  
-                  poke8 m w
172  
-                  go (n+1) (m+1)
  172
+                  poke (ptr `plusPtr` m) (fromIntegral w :: Word8)
  173
+                  -- A single ASCII octet is likely to start a run of
  174
+                  -- them.  We see better performance when we
  175
+                  -- special-case this assumption.
  176
+                  let ascii !t !u
  177
+                        | t == offLen || u == size || v >= 0x80 = go t u
  178
+                        | otherwise = do
  179
+                            poke (ptr `plusPtr` u) (fromIntegral v :: Word8)
  180
+                            ascii (t+1) (u+1)
  181
+                        where v = A.unsafeIndex arr t
  182
+                  ascii (n+1) (m+1)
173 183
               | w <= 0x7FF -> ensure 2 $ do
174 184
                   poke8 m     $ (w `shiftR` 6) + 0xC0
175 185
                   poke8 (m+1) $ (w .&. 0x3f) + 0x80

0 notes on commit a31c0f7

Please sign in to comment.
Something went wrong with that request. Please try again.