diff --git a/Data/Text/Encoding/Fusion/Common.hs b/Data/Text/Encoding/Fusion/Common.hs index eedb59df..06807a7f 100644 --- a/Data/Text/Encoding/Fusion/Common.hs +++ b/Data/Text/Encoding/Fusion/Common.hs @@ -4,7 +4,8 @@ -- Module : Data.Text.Encoding.Fusion.Common -- Copyright : (c) Tom Harper 2008-2009, -- (c) Bryan O'Sullivan 2009, --- (c) Duncan Coutts 2009 +-- (c) Duncan Coutts 2009, +-- (c) Jasper Van der Jeugt 2011 -- -- License : BSD-style -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, @@ -29,7 +30,7 @@ module Data.Text.Encoding.Fusion.Common import Data.Bits ((.&.)) import Data.Text.Fusion (Step(..), Stream(..)) -import Data.Text.Fusion.Internal (M(..), S(..)) +import Data.Text.Fusion.Internal (RS(..)) import Data.Text.UnsafeChar (ord) import Data.Text.UnsafeShift (shiftR) import Data.Word (Word8) @@ -37,126 +38,110 @@ import qualified Data.Text.Encoding.Utf8 as U8 -- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8. restreamUtf8 :: Stream Char -> Stream Word8 -restreamUtf8 (Stream next0 s0 len) = - Stream next (S s0 N N N) (len*2) - where - {-# INLINE next #-} - next (S s N N N) = case next0 s of - Done -> Done - Skip s' -> Skip (S s' N N N) - Yield x xs - | n <= 0x7F -> Yield c (S xs N N N) - | n <= 0x07FF -> Yield a2 (S xs (J b2) N N) - | n <= 0xFFFF -> Yield a3 (S xs (J b3) (J c3) N) - | otherwise -> Yield a4 (S xs (J b4) (J c4) (J d4)) - where - n = ord x - c = fromIntegral n - (a2,b2) = U8.ord2 x - (a3,b3,c3) = U8.ord3 x - (a4,b4,c4,d4) = U8.ord4 x - next (S s (J x2) N N) = Yield x2 (S s N N N) - next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) - next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) - next _ = internalError "restreamUtf8" +restreamUtf8 (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2) + where + next (RS0 s) = case next0 s of + Done -> Done + Skip s' -> Skip (RS0 s') + Yield x s' + | n <= 0x7F -> Yield c (RS0 s') + | n <= 0x07FF -> Yield a2 (RS1 s' b2) + | n <= 0xFFFF -> Yield a3 (RS2 s' b3 c3) + | otherwise -> Yield a4 (RS3 s' b4 c4 d4) + where + n = ord x + c = fromIntegral n + (a2,b2) = U8.ord2 x + (a3,b3,c3) = U8.ord3 x + (a4,b4,c4,d4) = U8.ord4 x + next (RS1 s x2) = Yield x2 (RS0 s) + next (RS2 s x2 x3) = Yield x2 (RS1 s x3) + next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4) + {-# INLINE next #-} {-# INLINE restreamUtf8 #-} restreamUtf16BE :: Stream Char -> Stream Word8 -restreamUtf16BE (Stream next0 s0 len) = - Stream next (S s0 N N N) (len*2) - where - {-# INLINE next #-} - next (S s N N N) = case next0 s of - Done -> Done - Skip s' -> Skip (S s' N N N) - Yield x xs - | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $ - S xs (J $ fromIntegral n) N N - | otherwise -> Yield c1 $ - S xs (J c2) (J c3) (J c4) - where - n = ord x - n1 = n - 0x10000 - c1 = fromIntegral (n1 `shiftR` 18 + 0xD8) - c2 = fromIntegral (n1 `shiftR` 10) - n2 = n1 .&. 0x3FF - c3 = fromIntegral (n2 `shiftR` 8 + 0xDC) - c4 = fromIntegral n2 - next (S s (J x2) N N) = Yield x2 (S s N N N) - next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) - next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) - next _ = internalError "restreamUtf16BE" +restreamUtf16BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2) + where + next (RS0 s) = case next0 s of + Done -> Done + Skip s' -> Skip (RS0 s') + Yield x s' + | n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $ + RS1 s' (fromIntegral n) + | otherwise -> Yield c1 $ RS3 s' c2 c3 c4 + where + n = ord x + n1 = n - 0x10000 + c1 = fromIntegral (n1 `shiftR` 18 + 0xD8) + c2 = fromIntegral (n1 `shiftR` 10) + n2 = n1 .&. 0x3FF + c3 = fromIntegral (n2 `shiftR` 8 + 0xDC) + c4 = fromIntegral n2 + next (RS1 s x2) = Yield x2 (RS0 s) + next (RS2 s x2 x3) = Yield x2 (RS1 s x3) + next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4) + {-# INLINE next #-} {-# INLINE restreamUtf16BE #-} restreamUtf16LE :: Stream Char -> Stream Word8 -restreamUtf16LE (Stream next0 s0 len) = - Stream next (S s0 N N N) (len*2) - where - {-# INLINE next #-} - next (S s N N N) = case next0 s of - Done -> Done - Skip s' -> Skip (S s' N N N) - Yield x xs - | n < 0x10000 -> Yield (fromIntegral n) $ - S xs (J (fromIntegral $ shiftR n 8)) N N - | otherwise -> Yield c1 $ - S xs (J c2) (J c3) (J c4) - where - n = ord x - n1 = n - 0x10000 - c2 = fromIntegral (shiftR n1 18 + 0xD8) - c1 = fromIntegral (shiftR n1 10) - n2 = n1 .&. 0x3FF - c4 = fromIntegral (shiftR n2 8 + 0xDC) - c3 = fromIntegral n2 - next (S s (J x2) N N) = Yield x2 (S s N N N) - next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) - next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) - next _ = internalError "restreamUtf16LE" +restreamUtf16LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2) + where + next (RS0 s) = case next0 s of + Done -> Done + Skip s' -> Skip (RS0 s') + Yield x s' + | n < 0x10000 -> Yield (fromIntegral n) $ + RS1 s' (fromIntegral $ shiftR n 8) + | otherwise -> Yield c1 $ RS3 s' c2 c3 c4 + where + n = ord x + n1 = n - 0x10000 + c2 = fromIntegral (shiftR n1 18 + 0xD8) + c1 = fromIntegral (shiftR n1 10) + n2 = n1 .&. 0x3FF + c4 = fromIntegral (shiftR n2 8 + 0xDC) + c3 = fromIntegral n2 + next (RS1 s x2) = Yield x2 (RS0 s) + next (RS2 s x2 x3) = Yield x2 (RS1 s x3) + next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4) + {-# INLINE next #-} {-# INLINE restreamUtf16LE #-} restreamUtf32BE :: Stream Char -> Stream Word8 -restreamUtf32BE (Stream next0 s0 len) = - Stream next (S s0 N N N) (len*2) - where - {-# INLINE next #-} - next (S s N N N) = case next0 s of +restreamUtf32BE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2) + where + next (RS0 s) = case next0 s of Done -> Done - Skip s' -> Skip (S s' N N N) - Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4)) + Skip s' -> Skip (RS0 s') + Yield x s' -> Yield c1 (RS3 s' c2 c3 c4) where n = ord x c1 = fromIntegral $ shiftR n 24 c2 = fromIntegral $ shiftR n 16 c3 = fromIntegral $ shiftR n 8 c4 = fromIntegral n - next (S s (J x2) N N) = Yield x2 (S s N N N) - next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) - next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) - next _ = internalError "restreamUtf32BE" + next (RS1 s x2) = Yield x2 (RS0 s) + next (RS2 s x2 x3) = Yield x2 (RS1 s x3) + next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4) + {-# INLINE next #-} {-# INLINE restreamUtf32BE #-} restreamUtf32LE :: Stream Char -> Stream Word8 -restreamUtf32LE (Stream next0 s0 len) = - Stream next (S s0 N N N) (len*2) - where - {-# INLINE next #-} - next (S s N N N) = case next0 s of +restreamUtf32LE (Stream next0 s0 len) = Stream next (RS0 s0) (len * 2) + where + next (RS0 s) = case next0 s of Done -> Done - Skip s' -> Skip (S s' N N N) - Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4)) + Skip s' -> Skip (RS0 s') + Yield x s' -> Yield c1 (RS3 s' c2 c3 c4) where n = ord x c4 = fromIntegral $ shiftR n 24 c3 = fromIntegral $ shiftR n 16 c2 = fromIntegral $ shiftR n 8 c1 = fromIntegral n - next (S s (J x2) N N) = Yield x2 (S s N N N) - next (S s (J x2) x3 N) = Yield x2 (S s x3 N N) - next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N) - next _ = internalError "restreamUtf32LE" + next (RS1 s x2) = Yield x2 (RS0 s) + next (RS2 s x2 x3) = Yield x2 (RS1 s x3) + next (RS3 s x2 x3 x4) = Yield x2 (RS2 s x3 x4) + {-# INLINE next #-} {-# INLINE restreamUtf32LE #-} - -internalError :: String -> a -internalError func = - error $ "Data.Text.Encoding.Fusion.Common." ++ func ++ ": internal error" diff --git a/Data/Text/Fusion/Internal.hs b/Data/Text/Fusion/Internal.hs index a5920273..e28d7a82 100644 --- a/Data/Text/Fusion/Internal.hs +++ b/Data/Text/Fusion/Internal.hs @@ -3,7 +3,8 @@ -- Module : Data.Text.Fusion.Internal -- Copyright : (c) Tom Harper 2008-2009, -- (c) Bryan O'Sullivan 2009, --- (c) Duncan Coutts 2009 +-- (c) Duncan Coutts 2009, +-- (c) Jasper Van der Jeugt 2011 -- -- License : BSD-style -- Maintainer : bos@serpentine.com, rtomharper@googlemail.com, @@ -19,7 +20,7 @@ module Data.Text.Fusion.Internal , M(..) , M8 , PairS(..) - , S(..) + , RS(..) , Step(..) , Stream(..) , Switch(..) @@ -39,7 +40,11 @@ data M a = N type M8 = M Word8 -- Restreaming state. -data S s = S !s !M8 !M8 !M8 +data RS s + = RS0 !s + | RS1 !s {-# UNPACK #-} !Word8 + | RS2 !s {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 + | RS3 !s {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 {-# UNPACK #-} !Word8 infixl 2 :*: data PairS a b = !a :*: !b