diff --git a/tests/benchmarks/src/Data/Text/Benchmarks.hs b/tests/benchmarks/src/Data/Text/Benchmarks.hs index 09916bf0..3a40012f 100644 --- a/tests/benchmarks/src/Data/Text/Benchmarks.hs +++ b/tests/benchmarks/src/Data/Text/Benchmarks.hs @@ -39,7 +39,11 @@ benchmarks = do -- Traditional benchmarks bs <- sequence [ Builder.benchmark - , DecodeUtf8.benchmark (tf "russian.txt") + , DecodeUtf8.benchmark "html" (tf "libya-chinese.html") + , DecodeUtf8.benchmark "xml" (tf "yiwiki.xml") + , DecodeUtf8.benchmark "ascii" (tf "ascii.txt") + , DecodeUtf8.benchmark "russian" (tf "russian.txt") + , DecodeUtf8.benchmark "japanese" (tf "japanese.txt") , EncodeUtf8.benchmark "επανάληψη 竺法蘭共譯" , Equality.benchmark (tf "japanese.txt") , FileRead.benchmark (tf "russian.txt") diff --git a/tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs b/tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs index 8acc7280..474b75e1 100644 --- a/tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs +++ b/tests/benchmarks/src/Data/Text/Benchmarks/DecodeUtf8.hs @@ -1,3 +1,5 @@ +{-# LANGUAGE ForeignFunctionInterface #-} + -- | Test decoding of UTF-8 -- -- Tested in this benchmark: @@ -16,7 +18,13 @@ module Data.Text.Benchmarks.DecodeUtf8 ( benchmark ) where -import Criterion (Benchmark, bgroup, bench, nf) +import Foreign.C.Types (CInt, CSize) +import Data.ByteString.Internal (ByteString(..)) +import Foreign.Ptr (Ptr, plusPtr) +import Foreign.ForeignPtr (withForeignPtr) +import Data.Word (Word8) +import qualified Criterion as C +import Criterion (Benchmark, bgroup, nf) import qualified Codec.Binary.UTF8.Generic as U8 import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as BL @@ -25,12 +33,14 @@ import qualified Data.Text.Encoding as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TL -benchmark :: FilePath -> IO Benchmark -benchmark fp = do +benchmark :: String -> FilePath -> IO Benchmark +benchmark kind fp = do bs <- B.readFile fp lbs <- BL.readFile fp + let bench name = C.bench (name ++ "+" ++ kind) return $ bgroup "DecodeUtf8" [ bench "Strict" $ nf T.decodeUtf8 bs + , bench "IConv" $ iconv bs , bench "StrictLength" $ nf (T.length . T.decodeUtf8) bs , bench "StrictInitLength" $ nf (T.length . T.init . T.decodeUtf8) bs , bench "Lazy" $ nf TL.decodeUtf8 lbs @@ -41,3 +51,9 @@ benchmark fp = do , bench "LazyStringUtf8" $ nf U8.toString lbs , bench "LazyStringUtf8Length" $ nf (length . U8.toString) lbs ] + +iconv :: ByteString -> IO CInt +iconv (PS fp off len) = withForeignPtr fp $ \ptr -> + time_iconv (ptr `plusPtr` off) (fromIntegral len) + +foreign import ccall unsafe time_iconv :: Ptr Word8 -> CSize -> IO CInt diff --git a/tests/benchmarks/text-benchmarks.cabal b/tests/benchmarks/text-benchmarks.cabal index ad52af3d..a9b920a6 100644 --- a/tests/benchmarks/text-benchmarks.cabal +++ b/tests/benchmarks/text-benchmarks.cabal @@ -18,6 +18,7 @@ cabal-version: >=1.2 executable text-benchmarks hs-source-dirs: src ../.. c-sources: ../../cbits/cbits.c + cbits/time_iconv.c main-is: Data/Text/Benchmarks.hs ghc-options: -Wall -O2 cpp-options: -DHAVE_DEEPSEQ