Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Drastic performance improvment for uniformR #6

Merged
merged 3 commits into from

2 participants

Aleksey Khudyakov Bryan O'Sullivan
Aleksey Khudyakov
Collaborator
20-40x performance improvements for uniformR for integral times

It looks like GHC fails to specialize uniformRange despite INLINE
pragma. This results in horrible 20-40x slowdown. Adding
SPECIALIZE pragma fixes that

Also accept ranges (a,b) where a<b for integral types. It doesn't produce any
measurable slowdown.

Benchmarks are added as well
Shimuuar and others added some commits
Aleksey Khudyakov Shimuuar Add uniformR to benchmarks 43ca7d7
Aleksey 20-40x performance improvements for uniformR for integral times
It looks like GHC fails to specialize uniformRange despite INLINE
pragma. This results in horrible 20-40x slowdown. Adding
SPECIALIZE pragma fixes that

Also accept ranges (a,b) where a<b. It doesn't produce any
measurable slowdown.

fixes #4.
d7fda63
Aleksey Khudyakov Shimuuar Fix uniformRange for case where (x1 > x2) 3909677
Aleksey Khudyakov
Collaborator

Ping?

Bryan O'Sullivan bos merged commit 1c1f03a into from
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Sep 28, 2011
  1. Aleksey Khudyakov

    Add uniformR to benchmarks

    Shimuuar authored
Commits on Sep 29, 2011
  1. Aleksey Khudyakov

    20-40x performance improvements for uniformR for integral times

    Aleksey authored Shimuuar committed
    It looks like GHC fails to specialize uniformRange despite INLINE
    pragma. This results in horrible 20-40x slowdown. Adding
    SPECIALIZE pragma fixes that
    
    Also accept ranges (a,b) where a<b. It doesn't produce any
    measurable slowdown.
    
    fixes #4.
Commits on Sep 30, 2011
  1. Aleksey Khudyakov
This page is out of date. Refresh to see the latest.
Showing with 62 additions and 27 deletions.
  1. +26 −15 System/Random/MWC.hs
  2. +36 −12 benchmarks/Benchmark.hs
41 System/Random/MWC.hs
View
@@ -478,21 +478,11 @@ type instance Unsigned Word = Word
-- unsigned data type of same size
sub :: (Integral a, Integral (Unsigned a)) => a -> a -> Unsigned a
sub x y = fromIntegral x - fromIntegral y
+{-# INLINE sub #-}
add :: (Integral a, Integral (Unsigned a)) => a -> Unsigned a -> a
add m x = m + fromIntegral x
-
--- Generate uniform value in the range [0,n). Values must be
--- unsigned. Second parameter is random number generator
-unsignedRange :: (PrimMonad m, Integral a, Bounded a) => a -> m a -> m a
-unsignedRange n rnd = go
- where
- buckets = maxBound `div` n
- maxN = buckets * n
- go = do x <- rnd
- if x < maxN then return (x `div` buckets)
- else go
-{-# INLINE unsignedRange #-}
+{-# INLINE add #-}
-- Generate unformly distributed value in inclusive range.
uniformRange :: ( PrimMonad m
@@ -500,10 +490,31 @@ uniformRange :: ( PrimMonad m
, Integral (Unsigned a), Bounded (Unsigned a), Variate (Unsigned a))
=> (a,a) -> Gen (PrimState m) -> m a
uniformRange (x1,x2) g
- | x1 == minBound && x2 == maxBound = uniform g
- | otherwise = do x <- unsignedRange (sub x2 x1 + 1) (uniform g)
- return $! add x1 x
+ | n == 0 = uniform g -- Abuse overflow in unsigned types
+ | otherwise = loop
+ where
+ -- Allow ranges where x2<x1
+ (# a, b #) | x1 < x2 = (# x1, x2 #)
+ | otherwise = (# x2, x1 #)
+ n = 1 + sub b a
+ buckets = maxBound `div` n
+ maxN = buckets * n
+ loop = do x <- uniform g
+ if x < maxN then return $! add a (x `div` buckets)
+ else loop
{-# INLINE uniformRange #-}
+-- These SPECIALIZE pragmas are crucial for performance. Without them
+-- generic version is used which is 20-40 times slower.
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int, Int) -> Gen (PrimState m) -> m Int #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int8, Int8) -> Gen (PrimState m) -> m Int8 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int16, Int16) -> Gen (PrimState m) -> m Int16 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int32, Int32) -> Gen (PrimState m) -> m Int32 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Int64, Int64) -> Gen (PrimState m) -> m Int64 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word, Word) -> Gen (PrimState m) -> m Word #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word8, Word8) -> Gen (PrimState m) -> m Word8 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word16,Word16) -> Gen (PrimState m) -> m Word16 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word32,Word32) -> Gen (PrimState m) -> m Word32 #-}
+{-# SPECIALIZE uniformRange :: (PrimMonad m) => (Word64,Word64) -> Gen (PrimState m) -> m Word64 #-}
-- | Generate a vector of pseudo-random variates. This is not
-- necessarily faster than invoking 'uniform' repeatedly in a loop,
48 benchmarks/Benchmark.hs
View
@@ -12,18 +12,42 @@ main = do
mtg <- M.newMTGen . Just =<< uniform mwc
defaultMain
[ bgroup "mwc"
- [ bench "Double" (uniform mwc :: IO Double)
- , bench "Int" (uniform mwc :: IO Int)
- , bench "Int8" (uniform mwc :: IO Int8)
- , bench "Int16" (uniform mwc :: IO Int16)
- , bench "Int32" (uniform mwc :: IO Int32)
- , bench "Int64" (uniform mwc :: IO Int64)
- , bench "Word" (uniform mwc :: IO Word)
- , bench "Word8" (uniform mwc :: IO Word8)
- , bench "Word16" (uniform mwc :: IO Word16)
- , bench "Word32" (uniform mwc :: IO Word32)
- , bench "Word64" (uniform mwc :: IO Word64)
- , bench "normal" (normal mwc :: IO Double)
+ -- One letter group names are used so they will fit on the plot.
+ --
+ -- U - uniform
+ -- R - uniformR
+ -- D - distribution
+ [ bgroup "U"
+ [ bench "Double" (uniform mwc :: IO Double)
+ , bench "Int" (uniform mwc :: IO Int)
+ , bench "Int8" (uniform mwc :: IO Int8)
+ , bench "Int16" (uniform mwc :: IO Int16)
+ , bench "Int32" (uniform mwc :: IO Int32)
+ , bench "Int64" (uniform mwc :: IO Int64)
+ , bench "Word" (uniform mwc :: IO Word)
+ , bench "Word8" (uniform mwc :: IO Word8)
+ , bench "Word16" (uniform mwc :: IO Word16)
+ , bench "Word32" (uniform mwc :: IO Word32)
+ , bench "Word64" (uniform mwc :: IO Word64)
+ ]
+ , bgroup "R"
+ -- I'm not entirely convinced that this is right way to test
+ -- uniformR. /A.Khudyakov/
+ [ bench "Double" (uniformR (-3.21,26) mwc :: IO Double)
+ , bench "Int" (uniformR (-12,679) mwc :: IO Int)
+ , bench "Int8" (uniformR (-12,4) mwc :: IO Int8)
+ , bench "Int16" (uniformR (-12,679) mwc :: IO Int16)
+ , bench "Int32" (uniformR (-12,679) mwc :: IO Int32)
+ , bench "Int64" (uniformR (-12,679) mwc :: IO Int64)
+ , bench "Word" (uniformR (34,633) mwc :: IO Word)
+ , bench "Word8" (uniformR (34,63) mwc :: IO Word8)
+ , bench "Word16" (uniformR (34,633) mwc :: IO Word16)
+ , bench "Word32" (uniformR (34,633) mwc :: IO Word32)
+ , bench "Word64" (uniformR (34,633) mwc :: IO Word64)
+ ]
+ , bgroup "D"
+ [ bench "normal" (normal mwc :: IO Double)
+ ]
]
, bgroup "random"
[
Something went wrong with that request. Please try again.