Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Many small documentation improvements.

  • Loading branch information...
commit f97e59a983936e1d8ae41a598c66ad4e84a84042 1 parent 8fa25cb
@bos authored
View
3  Data/Text.hs
@@ -575,7 +575,8 @@ replace s d = intercalate d . splitOn s
-- context-dependent operation. The case conversion functions in this
-- module are /not/ locale sensitive. Programs that require locale
-- sensitivity should use appropriate versions of the case mapping
--- functions from the @text-icu@ package.
+-- functions from the @text-icu@ package:
+-- <http://hackage.haskell.org/package/text-icu>
-- | /O(n)/ Convert a string to folded case. This function is mainly
-- useful for performing caseless (also known as case insensitive)
View
40 Data/Text/Encoding.hs
@@ -14,18 +14,20 @@
-- Functions for converting 'Text' values to and from 'ByteString',
-- using several standard encodings.
--
--- To make use of a much larger variety of encodings, use the @text-icu@
--- package.
+-- To gain access to a much larger family of encodings, use the
+-- @text-icu@ package: <http://hackage.haskell.org/package/text-icu>
module Data.Text.Encoding
(
-- * Decoding ByteStrings to Text
+ -- $strict
decodeASCII
, decodeUtf8
, decodeUtf16LE
, decodeUtf16BE
, decodeUtf32LE
, decodeUtf32BE
+
-- ** Controllable error handling
, decodeUtf8With
, decodeUtf16LEWith
@@ -60,11 +62,25 @@ import qualified Data.Text.Encoding.Utf16 as U16
import qualified Data.Text.Encoding.Utf8 as U8
import qualified Data.Text.Fusion as F
+-- $strict
+--
+-- All of the single-parameter functions for decoding bytestrings
+-- encoded in one of the Unicode Transformation Formats (UTF) operate
+-- in a /strict/ mode: each will throw an exception if given invalid
+-- input.
+--
+-- Each function has a variant, whose name is suffixed with -'With',
+-- that gives greater control over the handling of decoding errors.
+-- For instance, 'decodeUtf8' will throw an exception, but
+-- 'decodeUtf8With' allows the programmer to determine what to do on a
+-- decoding error.
+
-- | Decode a 'ByteString' containing 7-bit ASCII encoded text.
decodeASCII :: ByteString -> Text
decodeASCII bs = F.unstream (E.streamASCII bs)
{-# INLINE decodeASCII #-}
+-- | Decode a 'ByteString' containing UTF-8 encoded text.
decodeUtf8With :: OnDecodeError -> ByteString -> Text
decodeUtf8With onErr bs = textP (fst a) 0 (snd a)
where
@@ -101,6 +117,10 @@ decodeUtf8With onErr bs = textP (fst a) 0 (snd a)
{-# INLINE[0] decodeUtf8With #-}
-- | Decode a 'ByteString' containing UTF-8 encoded text.
+--
+-- If the input contains any invalid UTF-8 data, an exception will be
+-- thrown. For more control over the handling of invalid data, use
+-- 'decodeUtf8With'.
decodeUtf8 :: ByteString -> Text
decodeUtf8 = decodeUtf8With strictDecode
{-# INLINE[0] decodeUtf8 #-}
@@ -156,6 +176,10 @@ decodeUtf16LEWith onErr bs = F.unstream (E.streamUtf16LE onErr bs)
{-# INLINE decodeUtf16LEWith #-}
-- | Decode text from little endian UTF-16 encoding.
+--
+-- If the input contains any invalid little endian UTF-16 data, an
+-- exception will be thrown. For more control over the handling of
+-- invalid data, use 'decodeUtf16LEWith'.
decodeUtf16LE :: ByteString -> Text
decodeUtf16LE = decodeUtf16LEWith strictDecode
{-# INLINE decodeUtf16LE #-}
@@ -166,6 +190,10 @@ decodeUtf16BEWith onErr bs = F.unstream (E.streamUtf16BE onErr bs)
{-# INLINE decodeUtf16BEWith #-}
-- | Decode text from big endian UTF-16 encoding.
+--
+-- If the input contains any invalid big endian UTF-16 data, an
+-- exception will be thrown. For more control over the handling of
+-- invalid data, use 'decodeUtf16BEWith'.
decodeUtf16BE :: ByteString -> Text
decodeUtf16BE = decodeUtf16BEWith strictDecode
{-# INLINE decodeUtf16BE #-}
@@ -186,6 +214,10 @@ decodeUtf32LEWith onErr bs = F.unstream (E.streamUtf32LE onErr bs)
{-# INLINE decodeUtf32LEWith #-}
-- | Decode text from little endian UTF-32 encoding.
+--
+-- If the input contains any invalid little endian UTF-32 data, an
+-- exception will be thrown. For more control over the handling of
+-- invalid data, use 'decodeUtf32LEWith'.
decodeUtf32LE :: ByteString -> Text
decodeUtf32LE = decodeUtf32LEWith strictDecode
{-# INLINE decodeUtf32LE #-}
@@ -196,6 +228,10 @@ decodeUtf32BEWith onErr bs = F.unstream (E.streamUtf32BE onErr bs)
{-# INLINE decodeUtf32BEWith #-}
-- | Decode text from big endian UTF-32 encoding.
+--
+-- If the input contains any invalid big endian UTF-32 data, an
+-- exception will be thrown. For more control over the handling of
+-- invalid data, use 'decodeUtf32BEWith'.
decodeUtf32BE :: ByteString -> Text
decodeUtf32BE = decodeUtf32BEWith strictDecode
{-# INLINE decodeUtf32BE #-}
View
22 Data/Text/IO.hs
@@ -9,9 +9,15 @@
-- Portability : GHC
--
-- Efficient locale-sensitive support for text I\/O.
+--
+-- Skip past the synopsis for some important notes on performance and
+-- portability across different versions of GHC.
module Data.Text.IO
(
+ -- * Performance
+ -- $performance
+
-- * Locale support
-- $locale
-- * File-at-a-time operations
@@ -60,6 +66,22 @@ import System.IO (hGetBuffering, hFileSize, hSetBuffering, hTell)
import System.IO.Error (isEOFError)
#endif
+-- $performance
+-- #performance#
+--
+-- The functions in this module obey the runtime system's locale,
+-- character set encoding, and line ending conversion settings.
+--
+-- If you know in advance that you will be working with data that has
+-- a specific encoding (e.g. UTF-8), and your application is highly
+-- performance sensitive, you may find that it is faster to perform
+-- I\/O with bytestrings and to encode and decode yourself than to use
+-- the functions in this module.
+--
+-- Whether this will hold depends on the version of GHC you are using,
+-- the platform you are working on, the data you are working with, and
+-- the encodings you are using, so be sure to test for yourself.
+
-- | The 'readFile' function reads a file and returns the contents of
-- the file as a string. The entire file is read strictly, as with
-- 'getContents'.
View
38 Data/Text/Lazy/Encoding.hs
@@ -12,12 +12,13 @@
-- Functions for converting lazy 'Text' values to and from lazy
-- 'ByteString', using several standard encodings.
--
--- To make use of a much larger variety of encodings, use the @text-icu@
--- package.
+-- To gain access to a much larger variety of encodings, use the
+-- @text-icu@ package: <http://hackage.haskell.org/package/text-icu>
module Data.Text.Lazy.Encoding
(
-- * Decoding ByteStrings to Text
+ -- $strict
decodeASCII
, decodeUtf8
, decodeUtf16LE
@@ -51,6 +52,19 @@ import qualified Data.Text.Encoding as TE
import qualified Data.Text.Lazy.Encoding.Fusion as E
import qualified Data.Text.Lazy.Fusion as F
+-- $strict
+--
+-- All of the single-parameter functions for decoding bytestrings
+-- encoded in one of the Unicode Transformation Formats (UTF) operate
+-- in a /strict/ mode: each will throw an exception if given invalid
+-- input.
+--
+-- Each function has a variant, whose name is suffixed with -'With',
+-- that gives greater control over the handling of decoding errors.
+-- For instance, 'decodeUtf8' will throw an exception, but
+-- 'decodeUtf8With' allows the programmer to determine what to do on a
+-- decoding error.
+
-- | Decode a 'ByteString' containing 7-bit ASCII encoded text.
decodeASCII :: B.ByteString -> Text
decodeASCII bs = foldr (chunk . TE.decodeASCII) empty (B.toChunks bs)
@@ -95,6 +109,10 @@ decodeUtf8With onErr bs0 = fast bs0
{-# INLINE[0] decodeUtf8With #-}
-- | Decode a 'ByteString' containing UTF-8 encoded text.
+--
+-- If the input contains any invalid UTF-8 data, an exception will be
+-- thrown. For more control over the handling of invalid data, use
+-- 'decodeUtf8With'.
decodeUtf8 :: B.ByteString -> Text
decodeUtf8 = decodeUtf8With strictDecode
{-# INLINE[0] decodeUtf8 #-}
@@ -113,6 +131,10 @@ decodeUtf16LEWith onErr bs = F.unstream (E.streamUtf16LE onErr bs)
{-# INLINE decodeUtf16LEWith #-}
-- | Decode text from little endian UTF-16 encoding.
+--
+-- If the input contains any invalid little endian UTF-16 data, an
+-- exception will be thrown. For more control over the handling of
+-- invalid data, use 'decodeUtf16LEWith'.
decodeUtf16LE :: B.ByteString -> Text
decodeUtf16LE = decodeUtf16LEWith strictDecode
{-# INLINE decodeUtf16LE #-}
@@ -123,6 +145,10 @@ decodeUtf16BEWith onErr bs = F.unstream (E.streamUtf16BE onErr bs)
{-# INLINE decodeUtf16BEWith #-}
-- | Decode text from big endian UTF-16 encoding.
+--
+-- If the input contains any invalid big endian UTF-16 data, an
+-- exception will be thrown. For more control over the handling of
+-- invalid data, use 'decodeUtf16BEWith'.
decodeUtf16BE :: B.ByteString -> Text
decodeUtf16BE = decodeUtf16BEWith strictDecode
{-# INLINE decodeUtf16BE #-}
@@ -143,6 +169,10 @@ decodeUtf32LEWith onErr bs = F.unstream (E.streamUtf32LE onErr bs)
{-# INLINE decodeUtf32LEWith #-}
-- | Decode text from little endian UTF-32 encoding.
+--
+-- If the input contains any invalid little endian UTF-32 data, an
+-- exception will be thrown. For more control over the handling of
+-- invalid data, use 'decodeUtf32LEWith'.
decodeUtf32LE :: B.ByteString -> Text
decodeUtf32LE = decodeUtf32LEWith strictDecode
{-# INLINE decodeUtf32LE #-}
@@ -153,6 +183,10 @@ decodeUtf32BEWith onErr bs = F.unstream (E.streamUtf32BE onErr bs)
{-# INLINE decodeUtf32BEWith #-}
-- | Decode text from big endian UTF-32 encoding.
+--
+-- If the input contains any invalid big endian UTF-32 data, an
+-- exception will be thrown. For more control over the handling of
+-- invalid data, use 'decodeUtf32BEWith'.
decodeUtf32BE :: B.ByteString -> Text
decodeUtf32BE = decodeUtf32BEWith strictDecode
{-# INLINE decodeUtf32BE #-}
View
21 Data/Text/Lazy/IO.hs
@@ -9,9 +9,15 @@
-- Portability : GHC
--
-- Efficient locale-sensitive support for lazy text I\/O.
+--
+-- Skip past the synopsis for some important notes on performance and
+-- portability across different versions of GHC.
module Data.Text.Lazy.IO
(
+ -- * Performance
+ -- $performance
+
-- * Locale support
-- $locale
-- * File-at-a-time operations
@@ -58,6 +64,21 @@ import System.IO.Error (isEOFError)
import System.IO.Unsafe (unsafeInterleaveIO)
#endif
+-- $performance
+--
+-- The functions in this module obey the runtime system's locale,
+-- character set encoding, and line ending conversion settings.
+--
+-- If you know in advance that you will be working with data that has
+-- a specific encoding (e.g. UTF-8), and your application is highly
+-- performance sensitive, you may find that it is faster to perform
+-- I\/O with bytestrings and to encode and decode yourself than to use
+-- the functions in this module.
+--
+-- Whether this will hold depends on the version of GHC you are using,
+-- the platform you are working on, the data you are working with, and
+-- the encodings you are using, so be sure to test for yourself.
+
-- | Read a file and return its contents as a string. The file is
-- read lazily, as with 'getContents'.
readFile :: FilePath -> IO Text
View
6 Data/Text/Lazy/Read.hs
@@ -39,7 +39,8 @@ type Reader a = Text -> Either String (a,Text)
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
--- incorrect results.
+-- incorrect results. If you are worried about overflow, use
+-- 'Integer' for your result type.
decimal :: Integral a => Reader a
{-# SPECIALIZE decimal :: Reader Int #-}
{-# SPECIALIZE decimal :: Reader Integer #-}
@@ -59,7 +60,8 @@ decimal txt
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
--- incorrect results.
+-- incorrect results. If you are worried about overflow, use
+-- 'Integer' for your result type.
hexadecimal :: Integral a => Reader a
{-# SPECIALIZE hexadecimal :: Reader Int #-}
{-# SPECIALIZE hexadecimal :: Reader Integer #-}
View
8 Data/Text/Read.hs
@@ -39,7 +39,8 @@ type Reader a = Text -> Either String (a,Text)
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
--- incorrect results.
+-- incorrect results. If you are worried about overflow, use
+-- 'Integer' for your result type.
decimal :: Integral a => Reader a
{-# SPECIALIZE decimal :: Reader Int #-}
{-# SPECIALIZE decimal :: Reader Integer #-}
@@ -59,7 +60,8 @@ decimal txt
--
-- /Note/: For fixed-width integer types, this function does not
-- attempt to detect overflow, so a sufficiently long input may give
--- incorrect results.
+-- incorrect results. If you are worried about overflow, use
+-- 'Integer' for your result type.
hexadecimal :: Integral a => Reader a
{-# SPECIALIZE hexadecimal :: Reader Int #-}
{-# SPECIALIZE hexadecimal :: Reader Integer #-}
@@ -99,7 +101,7 @@ signed f = runP (signa (P f))
-- by the 'read' function, with the exception that a trailing @\'.\'@
-- or @\'e\'@ /not/ followed by a number is not consumed.
--
--- Examples:
+-- Examples (with behaviour identical to 'read'):
--
-- >rational "3" == Right (3.0, "")
-- >rational "3.1" == Right (3.1, "")
Please sign in to comment.
Something went wrong with that request. Please try again.