Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

163 lines (153 sloc) 5.983 kB
{-# LANGUAGE BangPatterns #-}
-- |
-- Module : Data.Text.Encoding.Fusion.Common
-- Copyright : (c) Tom Harper 2008-2009,
-- (c) Bryan O'Sullivan 2009,
-- (c) Duncan Coutts 2009
--
-- License : BSD-style
-- Maintainer : bos@serpentine.com, rtomharper@googlemail.com,
-- duncan@haskell.org
-- Stability : experimental
-- Portability : portable
--
-- Fusible 'Stream'-oriented functions for converting between 'Text'
-- and several common encodings.
module Data.Text.Encoding.Fusion.Common
(
-- * Restreaming
-- Restreaming is the act of converting from one 'Stream'
-- representation to another.
restreamUtf8
, restreamUtf16LE
, restreamUtf16BE
, restreamUtf32LE
, restreamUtf32BE
) where
import Data.Bits ((.&.))
import Data.Char (ord)
import Data.Text.Fusion (Step(..), Stream(..))
import Data.Text.Fusion.Internal (M(..), S(..))
import Data.Text.UnsafeShift (shiftR)
import Data.Word (Word8)
import qualified Data.Text.Encoding.Utf8 as U8
-- | /O(n)/ Convert a Stream Char into a UTF-8 encoded Stream Word8.
restreamUtf8 :: Stream Char -> Stream Word8
restreamUtf8 (Stream next0 s0 len) =
Stream next (S s0 N N N) (len*2)
where
{-# INLINE next #-}
next (S s N N N) = case next0 s of
Done -> Done
Skip s' -> Skip (S s' N N N)
Yield x xs
| n <= 0x7F -> Yield c (S xs N N N)
| n <= 0x07FF -> Yield a2 (S xs (J b2) N N)
| n <= 0xFFFF -> Yield a3 (S xs (J b3) (J c3) N)
| otherwise -> Yield a4 (S xs (J b4) (J c4) (J d4))
where
n = ord x
c = fromIntegral n
(a2,b2) = U8.ord2 x
(a3,b3,c3) = U8.ord3 x
(a4,b4,c4,d4) = U8.ord4 x
next (S s (J x2) N N) = Yield x2 (S s N N N)
next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
next _ = internalError "restreamUtf8"
{-# INLINE restreamUtf8 #-}
restreamUtf16BE :: Stream Char -> Stream Word8
restreamUtf16BE (Stream next0 s0 len) =
Stream next (S s0 N N N) (len*2)
where
{-# INLINE next #-}
next (S s N N N) = case next0 s of
Done -> Done
Skip s' -> Skip (S s' N N N)
Yield x xs
| n < 0x10000 -> Yield (fromIntegral $ n `shiftR` 8) $
S xs (J $ fromIntegral n) N N
| otherwise -> Yield c1 $
S xs (J c2) (J c3) (J c4)
where
n = ord x
n1 = n - 0x10000
c1 = fromIntegral (n1 `shiftR` 18 + 0xD8)
c2 = fromIntegral (n1 `shiftR` 10)
n2 = n1 .&. 0x3FF
c3 = fromIntegral (n2 `shiftR` 8 + 0xDC)
c4 = fromIntegral n2
next (S s (J x2) N N) = Yield x2 (S s N N N)
next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
next _ = internalError "restreamUtf16BE"
{-# INLINE restreamUtf16BE #-}
restreamUtf16LE :: Stream Char -> Stream Word8
restreamUtf16LE (Stream next0 s0 len) =
Stream next (S s0 N N N) (len*2)
where
{-# INLINE next #-}
next (S s N N N) = case next0 s of
Done -> Done
Skip s' -> Skip (S s' N N N)
Yield x xs
| n < 0x10000 -> Yield (fromIntegral n) $
S xs (J (fromIntegral $ shiftR n 8)) N N
| otherwise -> Yield c1 $
S xs (J c2) (J c3) (J c4)
where
n = ord x
n1 = n - 0x10000
c2 = fromIntegral (shiftR n1 18 + 0xD8)
c1 = fromIntegral (shiftR n1 10)
n2 = n1 .&. 0x3FF
c4 = fromIntegral (shiftR n2 8 + 0xDC)
c3 = fromIntegral n2
next (S s (J x2) N N) = Yield x2 (S s N N N)
next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
next _ = internalError "restreamUtf16LE"
{-# INLINE restreamUtf16LE #-}
restreamUtf32BE :: Stream Char -> Stream Word8
restreamUtf32BE (Stream next0 s0 len) =
Stream next (S s0 N N N) (len*2)
where
{-# INLINE next #-}
next (S s N N N) = case next0 s of
Done -> Done
Skip s' -> Skip (S s' N N N)
Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
where
n = ord x
c1 = fromIntegral $ shiftR n 24
c2 = fromIntegral $ shiftR n 16
c3 = fromIntegral $ shiftR n 8
c4 = fromIntegral n
next (S s (J x2) N N) = Yield x2 (S s N N N)
next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
next _ = internalError "restreamUtf32BE"
{-# INLINE restreamUtf32BE #-}
restreamUtf32LE :: Stream Char -> Stream Word8
restreamUtf32LE (Stream next0 s0 len) =
Stream next (S s0 N N N) (len*2)
where
{-# INLINE next #-}
next (S s N N N) = case next0 s of
Done -> Done
Skip s' -> Skip (S s' N N N)
Yield x xs -> Yield c1 (S xs (J c2) (J c3) (J c4))
where
n = ord x
c4 = fromIntegral $ shiftR n 24
c3 = fromIntegral $ shiftR n 16
c2 = fromIntegral $ shiftR n 8
c1 = fromIntegral n
next (S s (J x2) N N) = Yield x2 (S s N N N)
next (S s (J x2) x3 N) = Yield x2 (S s x3 N N)
next (S s (J x2) x3 x4) = Yield x2 (S s x3 x4 N)
next _ = internalError "restreamUtf32LE"
{-# INLINE restreamUtf32LE #-}
internalError :: String -> a
internalError func =
error $ "Data.Text.Encoding.Fusion.Common." ++ func ++ ": internal error"
Jump to Line
Something went wrong with that request. Please try again.