Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Drastic performance improvment for uniformR #6

Merged
merged 3 commits into from

2 participants

@Shimuuar
Collaborator
20-40x performance improvements for uniformR for integral times

It looks like GHC fails to specialize uniformRange despite INLINE
pragma. This results in horrible 20-40x slowdown. Adding
SPECIALIZE pragma fixes that

Also accept ranges (a,b) where a<b for integral types. It doesn't produce any
measurable slowdown.

Benchmarks are added as well
Shimuuar and others added some commits
@Shimuuar Shimuuar Add uniformR to benchmarks 43ca7d7
Aleksey 20-40x performance improvements for uniformR for integral times
It looks like GHC fails to specialize uniformRange despite INLINE
pragma. This results in horrible 20-40x slowdown. Adding
SPECIALIZE pragma fixes that

Also accept ranges (a,b) where a<b. It doesn't produce any
measurable slowdown.

fixes #4.
d7fda63
@Shimuuar Shimuuar Fix uniformRange for case where (x1 > x2) 3909677
@Shimuuar
Collaborator

Ping?

@bos bos merged commit 1c1f03a into bos:master
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Sep 28, 2011
  1. @Shimuuar

    Add uniformR to benchmarks

    Shimuuar authored
Commits on Sep 29, 2011
  1. @Shimuuar

    20-40x performance improvements for uniformR for integral times

    Aleksey authored Shimuuar committed
    It looks like GHC fails to specialize uniformRange despite INLINE
    pragma. This results in horrible 20-40x slowdown. Adding
    SPECIALIZE pragma fixes that
    
    Also accept ranges (a,b) where a<b. It doesn't produce any
    measurable slowdown.
    
    fixes #4.
Commits on Sep 30, 2011
  1. @Shimuuar
This page is out of date. Refresh to see the latest.
Showing with 62 additions and 27 deletions.
  1. +26 −15 System/Random/MWC.hs
  2. +36 −12 benchmarks/Benchmark.hs
View
41 System/Random/MWC.hs
@@ -478,21 +478,11 @@ type instance Unsigned Word = Word
-- unsigned data type of same size
sub :: (Integral a, Integral (Unsigned a)) => a -> a -> Unsigned a
sub x y = fromIntegral x - fromIntegral y
+{-# INLINE sub #-}
add :: (Integral a, Integral (Unsigned a)) => a -> Unsigned a -> a
add m x = m + fromIntegral x
-
--- Generate uniform value in the range [0,n). Values must be
--- unsigned. Second parameter is random number generator
-unsignedRange :: (PrimMonad m, Integral a, Bounded a) => a -> m a -> m a
-unsignedRange n rnd = go
- where
- buckets = maxBound `div` n
- maxN = buckets * n
- go = do x <- rnd
- if x < maxN then return (x `div` buckets)
- else go
-{-# INLINE unsignedRange #-}
+{-# INLINE add #-}
-- Generate unformly distributed value in inclusive range.
uniformRange :: ( PrimMonad m
@@ -500,10 +490,31 @@ uniformRange :: ( PrimMonad m
, Integral (Unsigned a), Bounded (Unsigned a), Variate (Unsigned a))
=> (a,a) -> Gen (PrimState m) -> m a
uniformRange (x1,x2) g
- | x1 == minBound && x2 == maxBound = uniform g
- | otherwise = do x <- unsignedRange (sub x2 x1 + 1) (uniform g)
- return $! add x1 x
+ | n == 0 = uniform g -- Abuse overflow in unsigned types
+ | otherwise = loop
+ where
+ -- Allow ranges where x2<x1
+ (# a, b #) | x1 < x2 = (# x1, x2 #)
+ | otherwise = (# x2, x1 #)
+ n = 1 + sub b a
+ buckets = maxBound `div` n
+ maxN = buckets * n
+ loop = do x <- uniform g
+ if x < maxN then return $! add a (x `div` buckets)
+ else loop
{-# INLINE uniformRange #-}
+-- These SPECIALIZE pragmas are crucial for performance. Without them
+-- generic version is used which is 20-40 times slower.
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int, Int) -> Gen (PrimState m) -> m Int #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int8, Int8) -> Gen (PrimState m) -> m Int8 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int16, Int16) -> Gen (PrimState m) -> m Int16 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int32, Int32) -> Gen (PrimState m) -> m Int32 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int64, Int64) -> Gen (PrimState m) -> m Int64 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word, Word) -> Gen (PrimState m) -> m Word #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word8, Word8) -> Gen (PrimState m) -> m Word8 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word16,Word16) -> Gen (PrimState m) -> m Word16 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word32,Word32) -> Gen (PrimState m) -> m Word32 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word64,Word64) -> Gen (PrimState m) -> m Word64 #-}
-- | Generate a vector of pseudo-random variates. This is not
-- necessarily faster than invoking 'uniform' repeatedly in a loop,
View
48 benchmarks/Benchmark.hs
@@ -12,18 +12,42 @@ main = do
mtg <- M.newMTGen . Just =<< uniform mwc
defaultMain
[ bgroup "mwc"
- [ bench "Double" (uniform mwc :: IO Double)
- , bench "Int" (uniform mwc :: IO Int)
- , bench "Int8" (uniform mwc :: IO Int8)
- , bench "Int16" (uniform mwc :: IO Int16)
- , bench "Int32" (uniform mwc :: IO Int32)
- , bench "Int64" (uniform mwc :: IO Int64)
- , bench "Word" (uniform mwc :: IO Word)
- , bench "Word8" (uniform mwc :: IO Word8)
- , bench "Word16" (uniform mwc :: IO Word16)
- , bench "Word32" (uniform mwc :: IO Word32)
- , bench "Word64" (uniform mwc :: IO Word64)
- , bench "normal" (normal mwc :: IO Double)
+ -- One letter group names are used so they will fit on the plot.
+ --
+ -- U - uniform
+ -- R - uniformR
+ -- D - distribution
+ [ bgroup "U"
+ [ bench "Double" (uniform mwc :: IO Double)
+ , bench "Int" (uniform mwc :: IO Int)
+ , bench "Int8" (uniform mwc :: IO Int8)
+ , bench "Int16" (uniform mwc :: IO Int16)
+ , bench "Int32" (uniform mwc :: IO Int32)
+ , bench "Int64" (uniform mwc :: IO Int64)
+ , bench "Word" (uniform mwc :: IO Word)
+ , bench "Word8" (uniform mwc :: IO Word8)
+ , bench "Word16" (uniform mwc :: IO Word16)
+ , bench "Word32" (uniform mwc :: IO Word32)
+ , bench "Word64" (uniform mwc :: IO Word64)
+ ]
+ , bgroup "R"
+ -- I'm not entirely convinced that this is right way to test
+ -- uniformR. /A.Khudyakov/
+ [ bench "Double" (uniformR (-3.21,26) mwc :: IO Double)
+ , bench "Int" (uniformR (-12,679) mwc :: IO Int)
+ , bench "Int8" (uniformR (-12,4) mwc :: IO Int8)
+ , bench "Int16" (uniformR (-12,679) mwc :: IO Int16)
+ , bench "Int32" (uniformR (-12,679) mwc :: IO Int32)
+ , bench "Int64" (uniformR (-12,679) mwc :: IO Int64)
+ , bench "Word" (uniformR (34,633) mwc :: IO Word)
+ , bench "Word8" (uniformR (34,63) mwc :: IO Word8)
+ , bench "Word16" (uniformR (34,633) mwc :: IO Word16)
+ , bench "Word32" (uniformR (34,633) mwc :: IO Word32)
+ , bench "Word64" (uniformR (34,633) mwc :: IO Word64)
+ ]
+ , bgroup "D"
+ [ bench "normal" (normal mwc :: IO Double)
+ ]
]
, bgroup "random"
[
Something went wrong with that request. Please try again.