Permalink
Browse files

Split encoding support out into new modules

--HG--
extra : convert_revision : 1132eb117b6ebdfe42a897ff200a34c914f415d3
  • Loading branch information...
1 parent 9954482 commit a70e3a0f6243ed88975072f5d6532c1565f30274 @bos committed Jan 27, 2009
Showing with 440 additions and 288 deletions.
  1. +3 −27 Data/Text.hs
  2. +99 −0 Data/Text/Encoding.hs
  3. +330 −0 Data/Text/Encoding/Fusion.hs
  4. +4 −259 Data/Text/Fusion.hs
  5. +2 −2 tests/Bench.hs
  6. +2 −0 text.cabal
View
@@ -29,15 +29,12 @@ module Data.Text
-- * Types
Text
- , Encoding(..)
-- * Creation and elimination
, pack
, unpack
, singleton
, empty
- , encode
- , decode
-- * Basic interface
, cons
@@ -98,31 +95,24 @@ module Data.Text
-- * Zipping and unzipping
, zipWith
-
- -- * I/O
- , readFile
) where
import Prelude (Char, Bool, Int, Maybe, String,
Eq, (==), (++), error,
Show, showsPrec,
Read, readsPrec,
- (&&), (||), (+), (-), (<), (>), (<=), (>=), (.), (>>=),
- return, otherwise,
- IO, FilePath)
+ (&&), (||), (+), (-), (<), (>), (<=), (>=), (.),
+ return, otherwise)
import Data.Char (isSpace)
import Control.Monad.ST (ST)
import qualified Data.Text.Array as A
-import qualified Data.ByteString as B
-import Data.ByteString (ByteString)
import qualified Data.List as L
import Data.Monoid (Monoid(..))
import Data.Word (Word16)
import Data.String (IsString(..))
import qualified Data.Text.Fusion as S
-import Data.Text.Fusion (Stream(..), Step(..), Encoding(..),
- stream, unstream, stream_bs, unstream_bs, restream)
+import Data.Text.Fusion (Stream(..), Step(..), stream, unstream)
import Data.Text.Internal (Text(..), empty)
import qualified Prelude as P
import Data.Text.UnsafeChar (unsafeChr)
@@ -190,14 +180,6 @@ singleton c = unstream (Stream next (c:[]) 1)
next [] = Done
{-# INLINE [1] singleton #-}
-decode :: Encoding -> ByteString -> Text
-decode enc bs = unstream (stream_bs enc bs)
-{-# INLINE decode #-}
-
-encode :: Encoding -> Text -> ByteString
-encode enc txt = unstream_bs (restream enc (stream txt))
-{-# INLINE encode #-}
-
-- -----------------------------------------------------------------------------
-- * Basic functions
@@ -561,12 +543,6 @@ elemIndex c t = S.elemIndex c (stream t)
zipWith :: (Char -> Char -> Char) -> Text -> Text -> Text
zipWith f t1 t2 = unstream (S.zipWith f (stream t1) (stream t2))
--- File I/O
-
-readFile :: Encoding -> FilePath -> IO Text
-readFile enc f = B.readFile f >>= return . unstream . stream_bs enc
-{-# INLINE [1] readFile #-}
-
words :: Text -> [Text]
words (Text arr off len) = loop0 off off
where
View
@@ -0,0 +1,99 @@
+-- |
+-- Module : Data.Text.Encoding
+-- Copyright : (c) Tom Harper 2008-2009,
+-- (c) Bryan O'Sullivan 2009,
+-- (c) Duncan Coutts 2009
+--
+-- License : BSD-style
+-- Maintainer : rtharper@aftereternity.co.uk, bos@serpentine.com,
+-- duncan@haskell.org
+-- Stability : experimental
+-- Portability : portable
+--
+-- Functions for converting 'Text' values to and from 'ByteString',
+-- using several common encodings.
+
+module Data.Text.Encoding
+ (
+ -- * Decoding ByteStrings to Text
+ decodeASCII
+ , decodeUtf8
+ , decodeUtf16LE
+ , decodeUtf16BE
+ , decodeUtf32LE
+ , decodeUtf32BE
+
+ -- * Encoding Text to ByteStrings
+ , encodeASCII
+ , encodeUtf8
+ , encodeUtf16LE
+ , encodeUtf16BE
+ , encodeUtf32LE
+ , encodeUtf32BE
+ ) where
+
+import Data.ByteString (ByteString)
+import qualified Data.Text.Fusion as F
+import qualified Data.Text.Encoding.Fusion as E
+import Data.Text.Internal (Text)
+
+-- | Decode a 'ByteString' containing 7-bit ASCII encoded text.
+decodeASCII :: ByteString -> Text
+decodeASCII bs = F.unstream (E.streamASCII bs)
+{-# INLINE decodeASCII #-}
+
+-- | Decode a 'ByteString' containing UTF-8 encoded text.
+decodeUtf8 :: ByteString -> Text
+decodeUtf8 bs = F.unstream (E.streamUtf8 bs)
+{-# INLINE decodeUtf8 #-}
+
+-- | Encode text using a 7-bit ASCII representation. /Note/: non-ASCII
+-- characters in the input 'Text' will be /truncated/.
+encodeASCII :: Text -> ByteString
+encodeASCII txt = E.unstream (E.restreamASCII (F.stream txt))
+{-# INLINE encodeASCII #-}
+
+-- | Encode text using UTF-8 encoding.
+encodeUtf8 :: Text -> ByteString
+encodeUtf8 txt = E.unstream (E.restreamUtf8 (F.stream txt))
+{-# INLINE encodeUtf8 #-}
+
+-- | Decode text from little endian UTF-16 encoding.
+decodeUtf16LE :: ByteString -> Text
+decodeUtf16LE bs = F.unstream (E.streamUtf16LE bs)
+{-# INLINE decodeUtf16LE #-}
+
+-- | Decode text from big endian UTF-16 encoding.
+decodeUtf16BE :: ByteString -> Text
+decodeUtf16BE bs = F.unstream (E.streamUtf16BE bs)
+{-# INLINE decodeUtf16BE #-}
+
+-- | Encode text using little endian UTF-16 encoding.
+encodeUtf16LE :: Text -> ByteString
+encodeUtf16LE txt = E.unstream (E.restreamUtf16LE (F.stream txt))
+{-# INLINE encodeUtf16LE #-}
+
+-- | Encode text using big endian UTF-16 encoding.
+encodeUtf16BE :: Text -> ByteString
+encodeUtf16BE txt = E.unstream (E.restreamUtf16BE (F.stream txt))
+{-# INLINE encodeUtf16BE #-}
+
+-- | Decode text from little endian UTF-32 encoding.
+decodeUtf32LE :: ByteString -> Text
+decodeUtf32LE bs = F.unstream (E.streamUtf32LE bs)
+{-# INLINE decodeUtf32LE #-}
+
+-- | Decode text from big endian UTF-32 encoding.
+decodeUtf32BE :: ByteString -> Text
+decodeUtf32BE bs = F.unstream (E.streamUtf32LE bs)
+{-# INLINE decodeUtf32BE #-}
+
+-- | Encode text using little endian UTF-32 encoding.
+encodeUtf32LE :: Text -> ByteString
+encodeUtf32LE txt = E.unstream (E.restreamUtf32LE (F.stream txt))
+{-# INLINE encodeUtf32LE #-}
+
+-- | Encode text using big endian UTF-32 encoding.
+encodeUtf32BE :: Text -> ByteString
+encodeUtf32BE txt = E.unstream (E.restreamUtf32BE (F.stream txt))
+{-# INLINE encodeUtf32BE #-}
Oops, something went wrong.

0 comments on commit a70e3a0

Please sign in to comment.