Navigation Menu

Skip to content

Commit

Permalink
Transplant UTF-8 decoding benchmarks as of 44d20dca8f35
Browse files Browse the repository at this point in the history
  • Loading branch information
bos committed Jul 10, 2011
1 parent 9dec3d6 commit 6a4ea93
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 4 deletions.
6 changes: 5 additions & 1 deletion tests/benchmarks/src/Data/Text/Benchmarks.hs
Expand Up @@ -39,7 +39,11 @@ benchmarks = do
-- Traditional benchmarks
bs <- sequence
[ Builder.benchmark
, DecodeUtf8.benchmark (tf "russian.txt")
, DecodeUtf8.benchmark "html" (tf "libya-chinese.html")
, DecodeUtf8.benchmark "xml" (tf "yiwiki.xml")
, DecodeUtf8.benchmark "ascii" (tf "ascii.txt")
, DecodeUtf8.benchmark "russian" (tf "russian.txt")
, DecodeUtf8.benchmark "japanese" (tf "japanese.txt")
, EncodeUtf8.benchmark "επανάληψη 竺法蘭共譯"
, Equality.benchmark (tf "japanese.txt")
, FileRead.benchmark (tf "russian.txt")
Expand Down
22 changes: 19 additions & 3 deletions tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs
@@ -1,3 +1,5 @@
{-# LANGUAGE ForeignFunctionInterface #-}

-- | Test decoding of UTF-8
--
-- Tested in this benchmark:
Expand All @@ -16,7 +18,13 @@ module Data.Text.Benchmarks.DecodeUtf8
( benchmark
) where

import Criterion (Benchmark, bgroup, bench, nf)
import Foreign.C.Types (CInt, CSize)
import Data.ByteString.Internal (ByteString(..))
import Foreign.Ptr (Ptr, plusPtr)
import Foreign.ForeignPtr (withForeignPtr)
import Data.Word (Word8)
import qualified Criterion as C
import Criterion (Benchmark, bgroup, nf)
import qualified Codec.Binary.UTF8.Generic as U8
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
Expand All @@ -25,12 +33,14 @@ import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL

benchmark :: FilePath -> IO Benchmark
benchmark fp = do
benchmark :: String -> FilePath -> IO Benchmark
benchmark kind fp = do
bs <- B.readFile fp
lbs <- BL.readFile fp
let bench name = C.bench (name ++ "+" ++ kind)
return $ bgroup "DecodeUtf8"
[ bench "Strict" $ nf T.decodeUtf8 bs
, bench "IConv" $ iconv bs
, bench "StrictLength" $ nf (T.length . T.decodeUtf8) bs
, bench "StrictInitLength" $ nf (T.length . T.init . T.decodeUtf8) bs
, bench "Lazy" $ nf TL.decodeUtf8 lbs
Expand All @@ -41,3 +51,9 @@ benchmark fp = do
, bench "LazyStringUtf8" $ nf U8.toString lbs
, bench "LazyStringUtf8Length" $ nf (length . U8.toString) lbs
]

iconv :: ByteString -> IO CInt
iconv (PS fp off len) = withForeignPtr fp $ \ptr ->
time_iconv (ptr `plusPtr` off) (fromIntegral len)

foreign import ccall unsafe time_iconv :: Ptr Word8 -> CSize -> IO CInt
1 change: 1 addition & 0 deletions tests/benchmarks/text-benchmarks.cabal
Expand Up @@ -18,6 +18,7 @@ cabal-version: >=1.2
executable text-benchmarks
hs-source-dirs: src ../..
c-sources: ../../cbits/cbits.c
cbits/time_iconv.c
main-is: Data/Text/Benchmarks.hs
ghc-options: -Wall -O2
cpp-options: -DHAVE_DEEPSEQ
Expand Down

0 comments on commit 6a4ea93

Please sign in to comment.