Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Fetching contributors…

Cannot retrieve contributors at this time

124 lines (106 sloc) 4.609 kb
{-# OPTIONS_GHC -Wall -fwarn-tabs #-}
{-# LANGUAGE ForeignFunctionInterface #-}
----------------------------------------------------------------
-- 2010.10.09
-- |
-- Module : IsSpace
-- Copyright : Copyright (c) 2010 wren ng thornton
-- License : BSD
-- Maintainer : wren@community.haskell.org
-- Stability : experimental
-- Portability : portable (FFI)
--
-- A benchmark for comparing different definitions of predicates
-- for detecting whitespace. As of the last run the results are:
--
-- * Data.Char.isSpace : 14.44786 us +/- 258.0377 ns
-- * isSpace_DataChar : 43.25154 us +/- 655.7037 ns
-- * isSpace_Char : 29.26598 us +/- 454.1445 ns
-- * isPerlSpace :
-- * Data.Attoparsec.Char8.isSpace : 81.87335 us +/- 1.195903 us
-- * isSpace_Char8 : 11.84677 us +/- 178.9795 ns
-- * isSpace_w8 : 11.55470 us +/- 133.7644 ns
----------------------------------------------------------------
module IsSpace (main) where
import qualified Data.Char as C
import Data.Word (Word8)
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import Foreign.C.Types (CInt)
import Criterion (bench, nf)
import Criterion.Main (defaultMain)
----------------------------------------------------------------
----- Character predicates
-- N.B. \x9..\xD == "\t\n\v\f\r"
-- | Recognize the same characters as Perl's @/\s/@ in Unicode mode.
-- In particular, we recognize POSIX 1003.2 @[[:space:]]@ except
-- @\'\v\'@, and recognize the Unicode @\'\x85\'@, @\'\x2028\'@,
-- @\'\x2029\'@. Notably, @\'\x85\'@ belongs to Latin-1 (but not
-- ASCII) and therefore does not belong to POSIX 1003.2 @[[:space:]]@
-- (nor non-Unicode @/\s/@).
isPerlSpace :: Char -> Bool
isPerlSpace c
= (' ' == c)
|| ('\t' <= c && c <= '\r' && c /= '\v')
|| ('\x85' == c)
|| ('\x2028' == c)
|| ('\x2029' == c)
{-# INLINE isPerlSpace #-}
-- | 'Data.Attoparsec.Char8.isSpace', duplicated here because it's
-- not exported. This is the definition as of attoparsec-0.8.1.0.
isSpace :: Char -> Bool
isSpace c = c `B8.elem` spaces
where
spaces = B8.pack " \n\r\t\v\f"
{-# NOINLINE spaces #-}
{-# INLINE isSpace #-}
-- | An alternate version of 'Data.Attoparsec.Char8.isSpace'.
isSpace_Char8 :: Char -> Bool
isSpace_Char8 c = (' ' == c) || ('\t' <= c && c <= '\r')
{-# INLINE isSpace_Char8 #-}
-- | An alternate version of 'Data.Char.isSpace'. This uses the
-- same trick as 'isSpace_Char8' but we include Unicode whitespaces
-- too, in order to have the same results as 'Data.Char.isSpace'
-- (whereas 'isSpace_Char8' doesn't recognize Unicode whitespace).
isSpace_Char :: Char -> Bool
isSpace_Char c
= (' ' == c)
|| ('\t' <= c && c <= '\r')
|| ('\xA0' == c)
|| (iswspace (fromIntegral (C.ord c)) /= 0)
{-# INLINE isSpace_Char #-}
foreign import ccall unsafe "u_iswspace"
iswspace :: CInt -> CInt
-- | Verbatim version of 'Data.Char.isSpace' (i.e., 'GHC.Unicode.isSpace'
-- as of base-4.2.0.2) in order to try to figure out why 'isSpace_Char'
-- is slower than 'Data.Char.isSpace'. It appears to be something
-- special in how the base library was compiled.
isSpace_DataChar :: Char -> Bool
isSpace_DataChar c =
c == ' ' ||
c == '\t' ||
c == '\n' ||
c == '\r' ||
c == '\f' ||
c == '\v' ||
c == '\xa0' ||
iswspace (fromIntegral (C.ord c)) /= 0
{-# INLINE isSpace_DataChar #-}
-- | A 'Word8' version of 'Data.Attoparsec.Char8.isSpace'.
isSpace_w8 :: Word8 -> Bool
isSpace_w8 w = (w == 32) || (9 <= w && w <= 13)
{-# INLINE isSpace_w8 #-}
----------------------------------------------------------------
main :: IO ()
main = defaultMain
[ bench "Data.Char.isSpace" $ nf (map C.isSpace) ['\x0'..'\255']
, bench "isSpace_DataChar" $ nf (map isSpace_DataChar) ['\x0'..'\255']
, bench "isSpace_Char" $ nf (map isSpace_Char) ['\x0'..'\255']
, bench "isPerlSpace" $ nf (map isPerlSpace) ['\x0'..'\255']
, bench "Data.Attoparsec.Char8.isSpace"
$ nf (map isSpace) ['\x0'..'\255']
, bench "isSpace_Char8" $ nf (map isSpace_Char8) ['\x0'..'\255']
, bench "isSpace_w8" $ nf (map isSpace_w8) [0..255]
]
----------------------------------------------------------------
----------------------------------------------------------- fin.
Jump to Line
Something went wrong with that request. Please try again.