diff --git a/src/Data/Text/IO.hs b/src/Data/Text/IO.hs index ad8389d7..a4e5b2d2 100644 --- a/src/Data/Text/IO.hs +++ b/src/Data/Text/IO.hs @@ -13,8 +13,11 @@ -- The functions in this module obey the runtime system's locale, -- character set encoding, and line ending conversion settings. -- +-- If you want to do I\/O using the UTF-8 encoding, use @Data.Text.IO.Utf8@, +-- which is faster than this module. +-- -- If you know in advance that you will be working with data that has --- a specific encoding (e.g. UTF-8), and your application is highly +-- a specific encoding, and your application is highly -- performance sensitive, you may find that it is faster to perform -- I\/O with bytestrings and to encode and decode yourself than to use -- the functions in this module. diff --git a/src/Data/Text/IO/Utf8.hs b/src/Data/Text/IO/Utf8.hs new file mode 100644 index 00000000..2d0cd495 --- /dev/null +++ b/src/Data/Text/IO/Utf8.hs @@ -0,0 +1,93 @@ +-- | +-- Module : Data.Text.IO.Utf8 +-- License : BSD-style +-- Portability : GHC +-- +-- Efficient UTF-8 support for text I\/O. +-- Unlike @Data.Text.IO@, these functions do not depend on the locale +-- and do not do line ending conversion. +module Data.Text.IO.Utf8 + ( + -- * File-at-a-time operations + readFile + , writeFile + , appendFile + -- * Operations on handles + , hGetContents + , hGetLine + , hPutStr + , hPutStrLn + -- * Special cases for standard input and output + , interact + , getContents + , getLine + , putStr + , putStrLn + ) where + +import Prelude hiding (readFile, writeFile, appendFile, interact, getContents, getLine, putStr, putStrLn) +import Control.Exception (evaluate) +import Control.Monad ((<=<)) +import Data.ByteString (ByteString) +import qualified Data.ByteString as B +import Data.Text (Text) +import Data.Text.Encoding (decodeUtf8, encodeUtf8) +import GHC.IO.Handle (Handle) +import qualified Data.ByteString.Char8 as B.Char8 + +decodeUtf8IO :: ByteString -> IO Text +decodeUtf8IO = evaluate . decodeUtf8 + +-- | The 'readFile' function reads a file and returns the contents of +-- the file as a string. The entire file is read strictly, as with +-- 'getContents'. +readFile :: FilePath -> IO Text +readFile = decodeUtf8IO <=< B.readFile + +-- | Write a string to a file. The file is truncated to zero length +-- before writing begins. +writeFile :: FilePath -> Text -> IO () +writeFile fp = B.writeFile fp . encodeUtf8 + +-- | Write a string to the end of a file. +appendFile :: FilePath -> Text -> IO () +appendFile fp = B.appendFile fp . encodeUtf8 + +-- | Read the remaining contents of a 'Handle' as a string. +hGetContents :: Handle -> IO Text +hGetContents = decodeUtf8IO <=< B.hGetContents + +-- | Read a single line from a handle. +hGetLine :: Handle -> IO Text +hGetLine = decodeUtf8IO <=< B.hGetLine + +-- | Write a string to a handle. +hPutStr :: Handle -> Text -> IO () +hPutStr h = B.hPutStr h . encodeUtf8 + +-- | Write a string to a handle, followed by a newline. +hPutStrLn :: Handle -> Text -> IO () +hPutStrLn h t = hPutStr h t >> B.hPutStr h (B.Char8.singleton '\n') + +-- | The 'interact' function takes a function of type @Text -> Text@ +-- as its argument. The entire input from the standard input device is +-- passed to this function as its argument, and the resulting string +-- is output on the standard output device. +interact :: (Text -> Text) -> IO () +interact f = putStr . f =<< getContents + +-- | Read all user input on 'stdin' as a single string. +getContents :: IO Text +getContents = decodeUtf8IO =<< B.getContents + +-- | Read a single line of user input from 'stdin'. +getLine :: IO Text +getLine = decodeUtf8IO =<< B.getLine + +-- | Write a string to 'stdout'. +putStr :: Text -> IO () +putStr = B.putStr . encodeUtf8 + +-- | Write a string to 'stdout', followed by a newline. +putStrLn :: Text -> IO () +putStrLn t = B.putStr (encodeUtf8 t) >> B.putStr (B.Char8.singleton '\n') diff --git a/tests/Tests/Properties/LowLevel.hs b/tests/Tests/Properties/LowLevel.hs index c3b0a605..5a7c7ceb 100644 --- a/tests/Tests/Properties/LowLevel.hs +++ b/tests/Tests/Properties/LowLevel.hs @@ -31,6 +31,7 @@ import qualified Data.Text as T import qualified Data.Text.IO as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.IO as TL +import qualified Data.Text.IO.Utf8 as TU import qualified System.IO as IO #ifdef MIN_VERSION_tasty_inspection_testing @@ -107,6 +108,9 @@ t_write_read_line m b t = write_read (T.concat . take 1) T.filter T.hPutStrLn tl_write_read_line m b t = write_read (TL.concat . take 1) TL.filter TL.hPutStrLn TL.hGetLine m b [t] +utf8_write_read = write_read T.unlines T.filter TU.hPutStr TU.hGetContents +utf8_write_read_line m b t = write_read (T.concat . take 1) T.filter TU.hPutStrLn + TU.hGetLine m b [t] testLowLevel :: TestTree testLowLevel = @@ -142,7 +146,9 @@ testLowLevel = testProperty "t_write_read" t_write_read, testProperty "tl_write_read" tl_write_read, testProperty "t_write_read_line" t_write_read_line, - testProperty "tl_write_read_line" tl_write_read_line + testProperty "tl_write_read_line" tl_write_read_line, + testProperty "utf8_write_read" utf8_write_read, + testProperty "utf8_write_read_line" utf8_write_read_line -- These tests are subject to I/O race conditions -- testProperty "t_put_get" t_put_get, -- testProperty "tl_put_get" tl_put_get diff --git a/text.cabal b/text.cabal index 05946247..9d340387 100644 --- a/text.cabal +++ b/text.cabal @@ -143,6 +143,7 @@ library Data.Text.Encoding.Error Data.Text.Foreign Data.Text.IO + Data.Text.IO.Utf8 Data.Text.Internal Data.Text.Internal.Builder Data.Text.Internal.Builder.Functions