Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Added some very minor comments and a simple benchmarking script.

Using this script I saw a 13X regression in stdGen in recent revisions
vs the 1.0.0.3 version, dropping to only 900K random ints/sec on my
machine.  However, it was some kind of heisenbug because now it's gone
away for unknown reasons.  Watch out for it in the future though.
  • Loading branch information...
commit e7f72c84cb7dec28a417ce24d085d31e9d97160d 1 parent 130e421
Ryan Newton rrnewton authored
133 Benchmark/BinSearch.hs
View
@@ -0,0 +1,133 @@
+
+-- This is a script used for timing the throughput of benchmarks that
+-- take one argument and have linear complexity.
+
+module Benchmark.BinSearch
+ (
+ binSearch
+ )
+where
+
+import Control.Monad
+import Data.Time.Clock -- Not in 6.10
+import Data.List
+import Data.IORef
+import System
+import System.IO
+import System.Cmd
+import System.Exit
+import Debug.Trace
+
+-- In seconds:
+--desired_exec_length = 3
+
+
+
+-- | Binary search for the number of inputs to a computation that
+-- | makes it take a specified time in seconds.
+--
+-- > binSearch verbose N (min,max) kernel
+--
+-- | binSearch will find the right input size that results in a time
+-- | between min and max, then it will then run for N trials and
+-- | return the median (input,time-in-seconds) pair.
+binSearch :: Bool -> Integer -> (Double,Double) -> (Integer -> IO ()) -> IO (Integer, Double)
+binSearch verbose trials (min,max) kernel =
+ do
+ when(verbose)$ putStrLn$ "[binsearch] Binary search for input size resulting in time in range "++ show (min,max)
+
+ let desired_exec_length = 1.0
+ good_trial t = (toRational t <= toRational max) && (toRational t >= toRational min)
+
+ --loop :: Bool -> [String] -> Int -> Integer -> IO ()
+
+ -- At some point we must give up...
+ loop n | n > (2 ^ 100) = error "ERROR binSearch: This function doesn't seem to scale in proportion to its last argument."
+
+ -- Not allowed to have "0" size input, bump it back to one:
+ loop 0 = loop 1
+
+ loop n =
+ do
+ when(verbose)$ putStr$ "[binsearch:"++ show n ++ "] "
+ -- hFlush stdout
+
+ time <- timeit$ kernel n
+
+ when(verbose)$ putStrLn$ "Time consumed: "++ show time
+ -- hFlush stdout
+ let rate = fromIntegral n / time
+
+ -- [2010.06.09] Introducing a small fudge factor to help our guess get over the line:
+ let initial_fudge_factor = 1.10
+ fudge_factor = 1.01 -- Even in the steady state we fudge a little
+ guess = desired_exec_length * rate
+
+ -- TODO: We should keep more history here so that we don't re-explore input space we have already explored.
+ -- This is a balancing act because of randomness in execution time.
+
+ if good_trial time
+ then do
+ when(verbose)$ putStrLn$ "[binsearch] Time in range. LOCKING input size and performing remaining trials."
+ print_trial 1 n time
+ lockin (trials-1) n [time]
+
+ -- Here we're still in the doubling phase:
+ else if time < 0.100
+ then loop (2*n)
+
+ else do when(verbose)$ putStrLn$ "[binsearch] Estimated rate to be "++show (round$ rate)++" per second. Trying to scale up..."
+
+ -- Here we've exited the doubling phase, but we're making our first guess as to how big a real execution should be:
+ if time > 0.100 && time < 0.33 * desired_exec_length
+ then do when(verbose)$ putStrLn$ "[binsearch] (Fudging first guess a little bit extra)"
+ loop (round$ guess * initial_fudge_factor)
+ else loop (round$ guess * fudge_factor)
+
+ -- Termination condition: Done with all trials.
+ lockin 0 n log = do when(verbose)$ putStrLn$ "[binsearch] Time-per-unit for all trials: "++
+ (concat $ intersperse " " (map (show . (/ toDouble n) . toDouble) $ sort log))
+ return (n, log !! ((length log) `quot` 2)) -- Take the median
+
+ lockin trials_left n log =
+ do when(verbose)$ putStrLn$ "[binsearch]------------------------------------------------------------"
+ time <- timeit$ kernel n
+ -- hFlush stdout
+ print_trial (trials - trials_left +1 ) n time
+ -- when(verbose)$ hFlush stdout
+ lockin (trials_left - 1) n (time : log)
+
+ print_trial trialnum n time =
+ let rate = fromIntegral n / time
+ timeperunit = time / fromIntegral n
+ in
+ when(verbose)$ putStrLn$ "[binsearch] TRIAL: "++show trialnum ++
+ " secPerUnit: "++ showTime timeperunit ++
+ " ratePerSec: "++ show (rate) ++
+ " seconds: "++showTime time
+
+
+
+ (n,t) <- loop 1
+ return (n, fromRational$ toRational t)
+
+showTime t = show ((fromRational $ toRational t) :: Double)
+toDouble :: Real a => a -> Double
+toDouble = fromRational . toRational
+
+
+-- Could use cycle counters here.... but the point of this is to time
+-- things on the order of a second.
+timeit io =
+ do strt <- getCurrentTime
+ io
+ end <- getCurrentTime
+ return (diffUTCTime end strt)
+
+test =
+ binSearch True 3 (1.0, 1.05)
+ (\n ->
+ do v <- newIORef 0
+ forM_ [1..n] $ \i -> do
+ old <- readIORef v
+ writeIORef v (old+i))
289 Benchmark/SimpleRNGBench.hs
View
@@ -0,0 +1,289 @@
+{-# LANGUAGE BangPatterns, ScopedTypeVariables, ForeignFunctionInterface #-}
+{-# OPTIONS_GHC -fwarn-unused-imports #-}
+
+-- | A simple script to do some very basic timing of the RNGs.
+
+-- It is important that we also run established stastical tests on
+-- these RNGs a some point...
+
+module Main where
+
+import qualified Codec.Encryption.BurtonRNGSlow as BS
+
+-- --import qualified Codec.Crypto.IntelAES.GladmanAES as GA
+-- import qualified Codec.Crypto.GladmanAES as GA
+-- import qualified Codec.Crypto.IntelAES.AESNI as NI
+-- import qualified Codec.Crypto.IntelAES as IA
+-- import qualified Codec.Crypto.ConvertRNG as CR
+-- -- import qualified Codec.Crypto.AES.Random as Svein
+
+import System.Exit (exitSuccess, exitFailure)
+import System.Environment
+import System.Random
+-- import System.PosixCompat (sleep)
+import System.Posix (sleep)
+import System.CPUTime (getCPUTime)
+-- import Data.Time.Clock (diffUTCTime)
+import System.CPUTime.Rdtsc
+import System.Console.GetOpt
+
+import GHC.Conc
+import Control.Concurrent
+import Control.Monad
+import Control.Concurrent.Chan
+import Control.Exception
+
+-- import Crypto.Random (CryptoRandomGen(..))
+
+import Data.IORef
+import Data.List
+import Data.Int
+import Data.Word
+import Data.List.Split
+import Data.Serialize
+import qualified Data.ByteString as B
+import Text.Printf
+
+import Foreign.Ptr
+import Foreign.ForeignPtr
+import Foreign.Storable (peek,poke)
+
+import Benchmark.BinSearch
+
+----------------------------------------------------------------------------------------------------
+-- Miscellaneous helpers:
+
+-- I cannot *believe* there is not a standard call or an
+-- easily-findable hackage library supporting locale-based printing of
+-- numbers. [2011.01.28]
+commaint :: Integral a => a -> String
+commaint n =
+ reverse $
+ concat $
+ intersperse "," $
+ chunk 3 $
+ reverse (show n)
+
+padleft n str | length str >= n = str
+padleft n str | otherwise = take (n - length str) (repeat ' ') ++ str
+
+padright n str | length str >= n = str
+padright n str | otherwise = str ++ take (n - length str) (repeat ' ')
+
+fmt_num n = if n < 100
+ then printf "%.2f" n
+ else commaint (round n)
+
+-- This version simply busy-waits to stay on the same core:
+measure_freq2 :: IO Int64
+measure_freq2 = do
+ let second = 1000 * 1000 * 1000 * 1000 -- picoseconds are annoying
+ t1 <- rdtsc
+ start <- getCPUTime
+ let loop !n !last =
+ do t2 <- rdtsc
+ when (t2 < last) $
+ putStrLn$ "COUNTERS WRAPPED "++ show (last,t2)
+ cput <- getCPUTime
+ if (cput - start < second)
+ then loop (n+1) t2
+ else return (n,t2)
+ (n,t2) <- loop 0 t1
+ putStrLn$ " Approx getCPUTime calls per second: "++ commaint n
+ when (t2 < t1) $
+ putStrLn$ "WARNING: rdtsc not monotonically increasing, first "++show t1++" then "++show t2++" on the same OS thread"
+
+ return$ fromIntegral (t2 - t1)
+
+----------------------------------------------------------------------------------------------------
+-- Drivers to get random numbers repeatedly.
+
+incr !counter =
+ do -- modifyIORef counter (+1) -- Not strict enough!
+ -- Incrementing counter strictly (avoiding stack overflow) is annoying:
+ c <- readIORef counter
+ let c' = c+1
+ evaluate c'
+ writeIORef counter c'
+
+loop :: RandomGen g => IORef Int -> (Int,g) -> IO b
+loop !counter !(!n,!g) =
+ do incr counter
+ loop counter (next g)
+
+-- Test overheads without actually generating any random numbers:
+data NoopRNG = NoopRNG
+instance RandomGen NoopRNG where
+ next g = (0,g)
+#if 1
+ genRange _ = (0,0)
+instance SplittableGen NoopRNG where
+#endif
+ split g = (g,g)
+
+type Kern = Int -> Ptr Int -> IO ()
+
+-- [2011.01.28] Changing this to take "count" and "accumulator ptr" as arguments:
+-- foreign import ccall "cbits/c_test.c" blast_rands :: Kern
+-- foreign import ccall "cbits/c_test.c" store_loop :: Kern
+-- foreign import ccall unsafe "stdlib.hs" rand :: IO Int
+
+----------------------------------------------------------------------------------------------------
+-- Timing:
+
+timeit numthreads freq msg mkgen =
+ do
+ counters <- forM [1..numthreads] (const$ newIORef 1)
+ tids <- forM counters $ \counter ->
+ forkIO $ loop counter (next$ mkgen 23852358661234)
+ threadDelay (1000*1000) -- One second
+ mapM_ killThread tids
+
+ finals <- mapM readIORef counters
+ let mean :: Double = fromIntegral (foldl1 (+) finals) / fromIntegral numthreads
+ cycles_per :: Double = fromIntegral freq / mean
+ print_result (round mean) msg cycles_per
+
+print_result total msg cycles_per =
+ putStrLn$ " "++ padleft 11 (commaint total) ++" random ints generated "++ padright 27 ("["++msg++"]") ++" ~ "
+ ++ fmt_num cycles_per ++" cycles/int"
+
+
+-- This function times a function on one or more threads. Rather than
+-- running a fixed number of iterations, this number does a binary
+-- search to find out how many iterations can be completed in a second.
+timeit2 :: Int -> Int64 -> String -> (Int -> Ptr Int -> IO ()) -> IO Int
+timeit2 numthreads freq msg ffn = do
+ ptr :: ForeignPtr Int <- mallocForeignPtr
+
+ let kern = if numthreads == 1
+ then ffn
+ else replicate_kernel numthreads ffn
+ wrapped n = withForeignPtr ptr (kern$ fromIntegral n)
+ (n,t) <- binSearch False 1 (1.0, 1.05) wrapped
+
+ -- ONLY if we're in multi-threaded mode do we then run again with
+ -- that input size on all threads:
+----------------------------------------
+-- NOTE, this approach is TOO SLOW. For workloads that take a massive
+-- parallel slowdown it doesn't make sense to use the same input size
+-- in serial and in parallel.
+-- DISABLING:
+{-
+ (n2,t2) <-
+ if numthreads > 1 then do
+ ptrs <- mapM (const mallocForeignPtr) [1..numthreads]
+ tmpchan <- newChan
+ putStrLn$ " [forking threads for multithreaded measurement, input size "++ show n++"]"
+ start <- getCPUTime
+ tids <- forM ptrs $ \ptr -> forkIO $
+ do withForeignPtr ptr (ffn$ fromIntegral n)
+ writeChan tmpchan ()
+ forM ptrs $ \_ -> readChan tmpchan
+ end <- getCPUTime
+ let t2 :: Double = fromIntegral (end-start) / 1000000000000.0
+ putStrLn$ " [joined threads, time "++ show t2 ++"]"
+ return (n * fromIntegral numthreads, t2)
+ else do
+ return (n,t)
+-}
+----------------------------------------
+
+ let total_per_second = round $ fromIntegral n * (1 / t)
+ cycles_per = fromIntegral freq * t / fromIntegral n
+ print_result total_per_second msg cycles_per
+ return total_per_second
+
+-- This lifts the C kernel to operate
+replicate_kernel :: Int -> Kern -> Kern
+replicate_kernel numthreads kern n ptr = do
+ ptrs <- forM [1..numthreads]
+ (const mallocForeignPtr)
+ tmpchan <- newChan
+ -- let childwork = ceiling$ fromIntegral n / fromIntegral numthreads
+ let childwork = n -- Keep it the same.. interested in per-thread throughput.
+ -- Fork/join pattern:
+ tids <- forM ptrs $ \ptr -> forkIO $
+ withForeignPtr ptr $ \p -> do
+ kern (fromIntegral childwork) p
+ result <- peek p
+ writeChan tmpchan result
+
+ results <- forM [1..numthreads] $ \_ ->
+ readChan tmpchan
+ -- Meaningless semantics here... sum the child ptrs and write to the input one:
+ poke ptr (foldl1 (+) results)
+ return ()
+
+----------------------------------------------------------------------------------------------------
+-- Main Script
+
+data Flag = NoC | Help | Test
+ deriving (Show, Eq)
+
+options =
+ [ Option ['h'] ["help"] (NoArg Help) "print program help"
+ , Option [] ["noC"] (NoArg NoC) "omit C benchmarks, haskell only"
+ , Option ['t'] ["test"] (NoArg Test) "run some basic tests"
+ ]
+
+
+main = do
+ argv <- getArgs
+ let (opts,_,other) = getOpt Permute options argv
+
+ -- when (Test `elem` opts)$ do
+ -- IA.testIntelAES
+ -- NI.testAESNI
+ -- exitSuccess
+
+ when (not$ null other) $ do
+ putStrLn$ "ERROR: Unrecognized options: "
+ mapM_ putStr other
+ exitFailure
+
+ when (Help `elem` opts) $ do
+ putStr$ usageInfo "Benchmark random number generation" options
+ exitSuccess
+
+ putStrLn$ "\nHow many random numbers can we generate in a second on one thread?"
+
+ t1 <- rdtsc
+ t2 <- rdtsc
+ putStrLn (" Cost of rdtsc (ffi call): " ++ show (t2 - t1))
+
+ freq <- measure_freq2
+ putStrLn$ " Approx clock frequency: " ++ commaint freq
+
+ let gamut th = do
+ putStrLn$ " First, timing with System.Random interface:"
+ timeit th freq "constant zero gen" (const NoopRNG)
+ timeit th freq "System.Random stdGen" mkStdGen
+ -- timeit th freq "PureHaskell/reference" BS.mkBurtonGen_reference
+ -- timeit th freq "PureHaskell" BS.mkBurtonGen
+ -- timeit th freq "Gladman inefficient" mkAESGen_gladman0
+ -- timeit th freq "Gladman" mkAESGen_gladman
+ -- timeit th freq "Compound gladman/intel" IA.mkAESGen
+
+ -- if IA.supportsAESNI then do
+ -- timeit th freq "IntelAES inefficient" NI.mkAESGen0
+ -- timeit th freq "IntelAES" NI.mkAESGen
+ -- else
+ -- putStrLn$ " [Skipping AESNI-only tests, current machine does not support these instructions.]"
+
+-- when (not$ NoC `elem` opts) $ do
+-- putStrLn$ " Comparison to C's rand():"
+-- timeit2 th freq "ptr store in C loop" store_loop
+-- timeit2 th freq "rand/store in C loop" blast_rands
+-- timeit2 th freq "rand in Haskell loop" (\n ptr -> forM_ [1..n]$ \_ -> rand )
+-- timeit2 th freq "rand/store in Haskell loop" (\n ptr -> forM_ [1..n]$ \_ -> do n <- rand; poke ptr n )
+-- return ()
+
+ -- Test with 1 thread and numCapabilities threads:
+ gamut 1
+ when (numCapabilities > 1) $ do
+ putStrLn$ "\nNow "++ show numCapabilities ++" threads, reporting mean randoms-per-second-per-thread:"
+ gamut numCapabilities
+ return ()
+
+ putStrLn$ "Finished."
2  System/Random.hs
View
@@ -368,6 +368,7 @@ mkStdRNG o = do
randomBounded :: (RandomGen g, Random a, Bounded a) => g -> (a, g)
randomBounded = randomR (minBound, maxBound)
+-- The two integer functions below take an [inclusive,inclusive] range.
randomIvalIntegral :: (RandomGen g, Integral a) => (a, a) -> g -> (a, g)
randomIvalIntegral (l,h) = randomIvalInteger (toInteger l, toInteger h)
@@ -387,6 +388,7 @@ randomIvalInteger (l,h) rng
in
f (n' - 1) (fromIntegral x + acc * b) g'
+-- The continuous functions on the other hand take an [inclusive,exclusive) range.
randomFrac :: (RandomGen g, Fractional a) => g -> (a, g)
randomFrac = randomIvalDouble (0::Double,1) realToFrac
2  tests/random1283.hs
View
@@ -17,6 +17,7 @@ testRace t s = do
iss <- threadRandoms t s
return (isInterleavingOf (ref::[Int]) iss)
+threadRandoms :: Random a => Int -> Int -> IO [[a]]
threadRandoms t s = do
vs <- sequence $ replicate t $ do
v <- newEmptyMVar
@@ -34,3 +35,4 @@ isInterleavingOf xs yss = iio xs (viewl $ fromList yss) EmptyL where
fromViewL (EmptyL) = empty
fromViewL (x :< xs) = x <| xs
+
Please sign in to comment.
Something went wrong with that request. Please try again.