Skip to content
Browse files

added IsSpace.hs benchmark (forgot it in the prev patch)

  • Loading branch information...
1 parent a0dc751 commit d2ac8a7739657f338aa22baab7027d60050b47e3 @wrengr wrengr committed Oct 10, 2010
Showing with 123 additions and 0 deletions.
  1. +123 −0 benchmarks/IsSpace.hs
View
123 benchmarks/IsSpace.hs
@@ -0,0 +1,123 @@
+{-# OPTIONS_GHC -Wall -fwarn-tabs #-}
+{-# LANGUAGE ForeignFunctionInterface #-}
+----------------------------------------------------------------
+-- 2010.10.09
+-- |
+-- Module : IsSpace
+-- Copyright : Copyright (c) 2010 wren ng thornton
+-- License : BSD
+-- Maintainer : wren@community.haskell.org
+-- Stability : experimental
+-- Portability : portable (FFI)
+--
+-- A benchmark for comparing different definitions of predicates
+-- for detecting whitespace. As of the last run the results are:
+--
+-- * Data.Char.isSpace : 14.44786 us +/- 258.0377 ns
+-- * isSpace_DataChar : 43.25154 us +/- 655.7037 ns
+-- * isSpace_Char : 29.26598 us +/- 454.1445 ns
+-- * isPerlSpace :
+-- * Data.Attoparsec.Char8.isSpace : 81.87335 us +/- 1.195903 us
+-- * isSpace_Char8 : 11.84677 us +/- 178.9795 ns
+-- * isSpace_w8 : 11.55470 us +/- 133.7644 ns
+----------------------------------------------------------------
+module IsSpace (main) where
+
+import qualified Data.Char as C
+import Data.Word (Word8)
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Char8 as B8
+import Foreign.C.Types (CInt)
+
+import Criterion (bench, nf)
+import Criterion.Main (defaultMain)
+
+----------------------------------------------------------------
+----- Character predicates
+-- N.B. \x9..\xD == "\t\n\v\f\r"
+
+-- | Recognize the same characters as Perl's @/\s/@ in Unicode mode.
+-- In particular, we recognize POSIX 1003.2 @[[:space:]]@ except
+-- @\'\v\'@, and recognize the Unicode @\'\x85\'@, @\'\x2028\'@,
+-- @\'\x2029\'@. Notably, @\'\x85\'@ belongs to Latin-1 (but not
+-- ASCII) and therefore does not belong to POSIX 1003.2 @[[:space:]]@
+-- (nor non-Unicode @/\s/@).
+isPerlSpace :: Char -> Bool
+isPerlSpace c
+ = (' ' == c)
+ || ('\t' <= c && c <= '\r' && c /= '\v')
+ || ('\x85' == c)
+ || ('\x2028' == c)
+ || ('\x2029' == c)
+{-# INLINE isPerlSpace #-}
+
+
+-- | 'Data.Attoparsec.Char8.isSpace', duplicated here because it's
+-- not exported. This is the definition as of attoparsec-0.8.1.0.
+isSpace :: Char -> Bool
+isSpace c = c `B8.elem` spaces
+ where
+ spaces = B8.pack " \n\r\t\v\f"
+ {-# NOINLINE spaces #-}
+{-# INLINE isSpace #-}
+
+
+-- | An alternate version of 'Data.Attoparsec.Char8.isSpace'.
+isSpace_Char8 :: Char -> Bool
+isSpace_Char8 c = (' ' == c) || ('\t' <= c && c <= '\r')
+{-# INLINE isSpace_Char8 #-}
+
+
+-- | An alternate version of 'Data.Char.isSpace'. This uses the
+-- same trick as 'isSpace_Char8' but we include Unicode whitespaces
+-- too, in order to have the same results as 'Data.Char.isSpace'
+-- (whereas 'isSpace_Char8' doesn't recognize Unicode whitespace).
+isSpace_Char :: Char -> Bool
+isSpace_Char c
+ = (' ' == c)
+ || ('\t' <= c && c <= '\r')
+ || ('\xA0' == c)
+ || (iswspace (fromIntegral (C.ord c)) /= 0)
+{-# INLINE isSpace_Char #-}
+
+foreign import ccall unsafe "u_iswspace"
+ iswspace :: CInt -> CInt
+
+-- | Verbatim version of 'Data.Char.isSpace' (i.e., 'GHC.Unicode.isSpace'
+-- as of base-4.2.0.2) in order to try to figure out why 'isSpace_Char'
+-- is slower than 'Data.Char.isSpace'. It appears to be something
+-- special in how the base library was compiled.
+isSpace_DataChar :: Char -> Bool
+isSpace_DataChar c =
+ c == ' ' ||
+ c == '\t' ||
+ c == '\n' ||
+ c == '\r' ||
+ c == '\f' ||
+ c == '\v' ||
+ c == '\xa0' ||
+ iswspace (fromIntegral (C.ord c)) /= 0
+{-# INLINE isSpace_DataChar #-}
+
+
+-- | A 'Word8' version of 'Data.Attoparsec.Char8.isSpace'.
+isSpace_w8 :: Word8 -> Bool
+isSpace_w8 w = (w == 32) || (9 <= w && w <= 13)
+{-# INLINE isSpace_w8 #-}
+
+----------------------------------------------------------------
+
+main :: IO ()
+main = defaultMain
+ [ bench "Data.Char.isSpace" $ nf (map C.isSpace) ['\x0'..'\255']
+ , bench "isSpace_DataChar" $ nf (map isSpace_DataChar) ['\x0'..'\255']
+ , bench "isSpace_Char" $ nf (map isSpace_Char) ['\x0'..'\255']
+ , bench "isPerlSpace" $ nf (map isPerlSpace) ['\x0'..'\255']
+ , bench "Data.Attoparsec.Char8.isSpace"
+ $ nf (map isSpace) ['\x0'..'\255']
+ , bench "isSpace_Char8" $ nf (map isSpace_Char8) ['\x0'..'\255']
+ , bench "isSpace_w8" $ nf (map isSpace_w8) [0..255]
+ ]
+
+----------------------------------------------------------------
+----------------------------------------------------------- fin.

0 comments on commit d2ac8a7

Please sign in to comment.
Something went wrong with that request. Please try again.