Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Use a simpler restreaming state

  • Loading branch information...
commit 2798f88458c8a3d444178baa7498fc345ca10dcb 1 parent 73d7013
@jaspervdj jaspervdj authored
Showing with 91 additions and 101 deletions.
  1. +83 −98 Data/Text/Encoding/Fusion/Common.hs
  2. +8 −3 Data/Text/Fusion/Internal.hs
View
181 Data/Text/Encoding/Fusion/Common.hs
@@ -4,7 +4,8 @@
-- Module : Data.Text.Encoding.Fusion.Common
-- Copyright : (c) Tom Harper 2008-2009,
-- (c) Bryan O'Sullivan 2009,
--- (c) Duncan Coutts 2009
+-- (c) Duncan Coutts 2009,
+-- (c) Jasper Van der Jeugt 2011
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,
@@ -29,7 +30,7 @@ module Data.Text.Encoding.Fusion.Common
import Data.Bits ((.&.))
import Data.Text.Fusion (Step(..), Stream(..))
-import Data.Text.Fusion.Internal (M(..), S(..))
+import Data.Text.Fusion.Internal (RS(..))
import Data.Text.UnsafeChar (ord)
import Data.Text.UnsafeShift (shiftR)
import Data.Word (Word8)
@@ -37,126 +38,110 @@ import qualified Data.Text.Encoding.Utf8 as U8
-- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
restreamUtf8 :: Stream Char -> Stream Word8
-restreamUtf8 (Stream next0 s0 len) =
- Stream next (S s0 N N N) (len*2)
- where
- {-# INLINE next #-}
- next (S s N N N) = case next0 s of
- Done -> Done
- Skip s' -> Skip (S s' N N N)
- Yield x xs
- | n <= 0x7F -> Yield c (S xs N N N)
- | n <= 0x07FF -> Yield a2 (S xs (J b2) N N)
- | n <= 0xFFFF -> Yield a3 (S xs (J b3) (J c3) N)
- | otherwise -> Yield a4 (S xs (J b4) (J c4) (J d4))
- where
- n = ord x
- c = fromIntegral n
- (a2,b2) = U8.ord2 x
- (a3,b3,c3) = U8.ord3 x
- (a4,b4,c4,d4) = U8.ord4 x
- next (S s (J x2) N N) = Yield x2 (S s N N N)
- next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
- next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
- next _ = internalError "restreamUtf8"
+restreamUtf8 (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
+ where
+ next (RS0 s) = case next0 s of
+ Done -> Done
+ Skip s' -> Skip (RS0 s')
+ Yield x s'
+ | n <= 0x7F -> Yield c (RS0 s')
+ | n <= 0x07FF -> Yield a2 (RS1 s' b2)
+ | n <= 0xFFFF -> Yield a3 (RS2 s' b3 c3)
+ | otherwise -> Yield a4 (RS3 s' b4 c4 d4)
+ where
+ n = ord x
+ c = fromIntegral n
+ (a2,b2) = U8.ord2 x
+ (a3,b3,c3) = U8.ord3 x
+ (a4,b4,c4,d4) = U8.ord4 x
+ next (RS1 s x2) = Yield x2 (RS0 s)
+ next (RS2 s x2 x3) = Yield x2 (RS1 s x3)
+ next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
+ {-# INLINE next #-}
{-# INLINE restreamUtf8 #-}
restreamUtf16BE :: Stream Char -> Stream Word8
-restreamUtf16BE (Stream next0 s0 len) =
- Stream next (S s0 N N N) (len*2)
- where
- {-# INLINE next #-}
- next (S s N N N) = case next0 s of
- Done -> Done
- Skip s' -> Skip (S s' N N N)
- Yield x xs
- | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
- S xs (J $ fromIntegral n) N N
- | otherwise -> Yield c1 $
- S xs (J c2) (J c3) (J c4)
- where
- n = ord x
- n1 = n - 0x10000
- c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
- c2 = fromIntegral (n1 `shiftR` 10)
- n2 = n1 .&. 0x3FF
- c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
- c4 = fromIntegral n2
- next (S s (J x2) N N) = Yield x2 (S s N N N)
- next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
- next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
- next _ = internalError "restreamUtf16BE"
+restreamUtf16BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
+ where
+ next (RS0 s) = case next0 s of
+ Done -> Done
+ Skip s' -> Skip (RS0 s')
+ Yield x s'
+ | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
+ RS1 s' (fromIntegral n)
+ | otherwise -> Yield c1 $ RS3 s' c2 c3 c4
+ where
+ n = ord x
+ n1 = n - 0x10000
+ c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
+ c2 = fromIntegral (n1 `shiftR` 10)
+ n2 = n1 .&. 0x3FF
+ c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
+ c4 = fromIntegral n2
+ next (RS1 s x2) = Yield x2 (RS0 s)
+ next (RS2 s x2 x3) = Yield x2 (RS1 s x3)
+ next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
+ {-# INLINE next #-}
{-# INLINE restreamUtf16BE #-}
restreamUtf16LE :: Stream Char -> Stream Word8
-restreamUtf16LE (Stream next0 s0 len) =
- Stream next (S s0 N N N) (len*2)
- where
- {-# INLINE next #-}
- next (S s N N N) = case next0 s of
- Done -> Done
- Skip s' -> Skip (S s' N N N)
- Yield x xs
- | n < 0x10000 -> Yield (fromIntegral n) $
- S xs (J (fromIntegral $ shiftR n 8)) N N
- | otherwise -> Yield c1 $
- S xs (J c2) (J c3) (J c4)
- where
- n = ord x
- n1 = n - 0x10000
- c2 = fromIntegral (shiftR n1 18 + 0xD8)
- c1 = fromIntegral (shiftR n1 10)
- n2 = n1 .&. 0x3FF
- c4 = fromIntegral (shiftR n2 8 + 0xDC)
- c3 = fromIntegral n2
- next (S s (J x2) N N) = Yield x2 (S s N N N)
- next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
- next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
- next _ = internalError "restreamUtf16LE"
+restreamUtf16LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
+ where
+ next (RS0 s) = case next0 s of
+ Done -> Done
+ Skip s' -> Skip (RS0 s')
+ Yield x s'
+ | n < 0x10000 -> Yield (fromIntegral n) $
+ RS1 s' (fromIntegral $ shiftR n 8)
+ | otherwise -> Yield c1 $ RS3 s' c2 c3 c4
+ where
+ n = ord x
+ n1 = n - 0x10000
+ c2 = fromIntegral (shiftR n1 18 + 0xD8)
+ c1 = fromIntegral (shiftR n1 10)
+ n2 = n1 .&. 0x3FF
+ c4 = fromIntegral (shiftR n2 8 + 0xDC)
+ c3 = fromIntegral n2
+ next (RS1 s x2) = Yield x2 (RS0 s)
+ next (RS2 s x2 x3) = Yield x2 (RS1 s x3)
+ next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
+ {-# INLINE next #-}
{-# INLINE restreamUtf16LE #-}
restreamUtf32BE :: Stream Char -> Stream Word8
-restreamUtf32BE (Stream next0 s0 len) =
- Stream next (S s0 N N N) (len*2)
- where
- {-# INLINE next #-}
- next (S s N N N) = case next0 s of
+restreamUtf32BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
+ where
+ next (RS0 s) = case next0 s of
Done -> Done
- Skip s' -> Skip (S s' N N N)
- Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
+ Skip s' -> Skip (RS0 s')
+ Yield x s' -> Yield c1 (RS3 s' c2 c3 c4)
where
n = ord x
c1 = fromIntegral $ shiftR n 24
c2 = fromIntegral $ shiftR n 16
c3 = fromIntegral $ shiftR n 8
c4 = fromIntegral n
- next (S s (J x2) N N) = Yield x2 (S s N N N)
- next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
- next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
- next _ = internalError "restreamUtf32BE"
+ next (RS1 s x2) = Yield x2 (RS0 s)
+ next (RS2 s x2 x3) = Yield x2 (RS1 s x3)
+ next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
+ {-# INLINE next #-}
{-# INLINE restreamUtf32BE #-}
restreamUtf32LE :: Stream Char -> Stream Word8
-restreamUtf32LE (Stream next0 s0 len) =
- Stream next (S s0 N N N) (len*2)
- where
- {-# INLINE next #-}
- next (S s N N N) = case next0 s of
+restreamUtf32LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2)
+ where
+ next (RS0 s) = case next0 s of
Done -> Done
- Skip s' -> Skip (S s' N N N)
- Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
+ Skip s' -> Skip (RS0 s')
+ Yield x s' -> Yield c1 (RS3 s' c2 c3 c4)
where
n = ord x
c4 = fromIntegral $ shiftR n 24
c3 = fromIntegral $ shiftR n 16
c2 = fromIntegral $ shiftR n 8
c1 = fromIntegral n
- next (S s (J x2) N N) = Yield x2 (S s N N N)
- next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
- next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
- next _ = internalError "restreamUtf32LE"
+ next (RS1 s x2) = Yield x2 (RS0 s)
+ next (RS2 s x2 x3) = Yield x2 (RS1 s x3)
+ next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4)
+ {-# INLINE next #-}
{-# INLINE restreamUtf32LE #-}
-
-internalError :: String -> a
-internalError func =
- error $ "Data.Text.Encoding.Fusion.Common." ++ func ++ ": internal error"
View
11 Data/Text/Fusion/Internal.hs
@@ -3,7 +3,8 @@
-- Module : Data.Text.Fusion.Internal
-- Copyright : (c) Tom Harper 2008-2009,
-- (c) Bryan O'Sullivan 2009,
--- (c) Duncan Coutts 2009
+-- (c) Duncan Coutts 2009,
+-- (c) Jasper Van der Jeugt 2011
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,
@@ -19,7 +20,7 @@ module Data.Text.Fusion.Internal
, M(..)
, M8
, PairS(..)
- , S(..)
+ , RS(..)
, Step(..)
, Stream(..)
, Switch(..)
@@ -39,7 +40,11 @@ data M a = N
type M8 = M Word8
-- Restreaming state.
-data S s = S !s !M8 !M8 !M8
+data RS s
+ = RS0 !s
+ | RS1 !s {-# UNPACK #-} !Word8
+ | RS2 !s {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
+ | RS3 !s {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8
infixl 2 :*:
data PairS a b = !a :*: !b
Please sign in to comment.
Something went wrong with that request. Please try again.