Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

124 lines (106 sloc) 4.609 kB
{-# OPTIONS_GHC -Wall -fwarn-tabs #-}
{-# LANGUAGE ForeignFunctionInterface #-}
----------------------------------------------------------------
-- 2010.10.09
-- |
-- Module : IsSpace
-- Copyright : Copyright (c) 2010 wren ng thornton
-- License : BSD
-- Maintainer : wren@community.haskell.org
-- Stability : experimental
-- Portability : portable (FFI)
--
-- A benchmark for comparing different definitions of predicates
-- for detecting whitespace. As of the last run the results are:
--
-- * Data.Char.isSpace : 14.44786 us +/- 258.0377 ns
-- * isSpace_DataChar : 43.25154 us +/- 655.7037 ns
-- * isSpace_Char : 29.26598 us +/- 454.1445 ns
-- * isPerlSpace :
-- * Data.Attoparsec.Char8.isSpace : 81.87335 us +/- 1.195903 us
-- * isSpace_Char8 : 11.84677 us +/- 178.9795 ns
-- * isSpace_w8 : 11.55470 us +/- 133.7644 ns
----------------------------------------------------------------
module IsSpace (main) where
import qualified Data.Char as C
import Data.Word (Word8)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import Foreign.C.Types (CInt)
import Criterion (bench, nf)
import Criterion.Main (defaultMain)
----------------------------------------------------------------
----- Character predicates
-- N.B. \x9..\xD == "\t\n\v\f\r"
-- | Recognize the same characters as Perl's @/\s/@ in Unicode mode.
-- In particular, we recognize POSIX 1003.2 @[[:space:]]@ except
-- @\'\v\'@, and recognize the Unicode @\'\x85\'@, @\'\x2028\'@,
-- @\'\x2029\'@. Notably, @\'\x85\'@ belongs to Latin-1 (but not
-- ASCII) and therefore does not belong to POSIX 1003.2 @[[:space:]]@
-- (nor non-Unicode @/\s/@).
isPerlSpace :: Char -> Bool
isPerlSpace c
= (' ' == c)
|| ('\t' <= c && c <= '\r' && c /= '\v')
|| ('\x85' == c)
|| ('\x2028' == c)
|| ('\x2029' == c)
{-# INLINE isPerlSpace #-}
-- | 'Data.Attoparsec.Char8.isSpace', duplicated here because it's
-- not exported. This is the definition as of attoparsec-0.8.1.0.
isSpace :: Char -> Bool
isSpace c = c `B8.elem` spaces
where
spaces = B8.pack " \n\r\t\v\f"
{-# NOINLINE spaces #-}
{-# INLINE isSpace #-}
-- | An alternate version of 'Data.Attoparsec.Char8.isSpace'.
isSpace_Char8 :: Char -> Bool
isSpace_Char8 c = (' ' == c) || ('\t' <= c && c <= '\r')
{-# INLINE isSpace_Char8 #-}
-- | An alternate version of 'Data.Char.isSpace'. This uses the
-- same trick as 'isSpace_Char8' but we include Unicode whitespaces
-- too, in order to have the same results as 'Data.Char.isSpace'
-- (whereas 'isSpace_Char8' doesn't recognize Unicode whitespace).
isSpace_Char :: Char -> Bool
isSpace_Char c
= (' ' == c)
|| ('\t' <= c && c <= '\r')
|| ('\xA0' == c)
|| (iswspace (fromIntegral (C.ord c)) /= 0)
{-# INLINE isSpace_Char #-}
foreign import ccall unsafe "u_iswspace"
iswspace :: CInt -> CInt
-- | Verbatim version of 'Data.Char.isSpace' (i.e., 'GHC.Unicode.isSpace'
-- as of base-4.2.0.2) in order to try to figure out why 'isSpace_Char'
-- is slower than 'Data.Char.isSpace'. It appears to be something
-- special in how the base library was compiled.
isSpace_DataChar :: Char -> Bool
isSpace_DataChar c =
c == ' ' ||
c == '\t' ||
c == '\n' ||
c == '\r' ||
c == '\f' ||
c == '\v' ||
c == '\xa0' ||
iswspace (fromIntegral (C.ord c)) /= 0
{-# INLINE isSpace_DataChar #-}
-- | A 'Word8' version of 'Data.Attoparsec.Char8.isSpace'.
isSpace_w8 :: Word8 -> Bool
isSpace_w8 w = (w == 32) || (9 <= w && w <= 13)
{-# INLINE isSpace_w8 #-}
----------------------------------------------------------------
main :: IO ()
main = defaultMain
[ bench "Data.Char.isSpace" $ nf (map C.isSpace) ['\x0'..'\255']
, bench "isSpace_DataChar" $ nf (map isSpace_DataChar) ['\x0'..'\255']
, bench "isSpace_Char" $ nf (map isSpace_Char) ['\x0'..'\255']
, bench "isPerlSpace" $ nf (map isPerlSpace) ['\x0'..'\255']
, bench "Data.Attoparsec.Char8.isSpace"
$ nf (map isSpace) ['\x0'..'\255']
, bench "isSpace_Char8" $ nf (map isSpace_Char8) ['\x0'..'\255']
, bench "isSpace_w8" $ nf (map isSpace_w8) [0..255]
]
----------------------------------------------------------------
----------------------------------------------------------- fin.
Jump to Line
Something went wrong with that request. Please try again.