Merge d1d847a into 027a073

bashtage · May 26, 2019 · f749835 · f749835
2 parents 027a073 + d1d847a
commit f749835
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 95 deletions.
diff --git a/benchmark.py b/benchmark.py
@@ -27,8 +27,8 @@
 else:
     scale_64 = 2
 
-PRNGS = ['DSFMT', 'PCG64', 'PCG32', 'MT19937', 'Xoroshiro128', 'Xorshift1024',
-         'Xoshiro256', 'Xoshiro512', 'Philox', 'ThreeFry',
+PRNGS = ['DSFMT', 'PCG64', 'PCG32', 'MT19937', 'MT64', 'Xoroshiro128',
+         'Xorshift1024', 'Xoshiro256', 'Xoshiro512', 'Philox', 'ThreeFry',
          'ThreeFry32', 'numpy']
 
 
@@ -109,9 +109,11 @@ def timer_bounded(bits=8, max=95, use_masked=True):
     minimum = 0
 
     if use_masked:  # Use masking & rejection.
-        command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits}, use_masked=True)'
+        command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits},' \
+                  ' use_masked=True)'
     else:  # Use Lemire's algo.
-        command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits}, use_masked=False)'
+        command = 'rg.randint({min}, {max}+1, 1000000, dtype=np.uint{bits},' \
+                  ' use_masked=False)'
 
     command = command.format(min=minimum, max=max, bits=bits)
 
@@ -120,7 +122,8 @@ def timer_bounded(bits=8, max=95, use_masked=True):
 
     run_timer(command, command_numpy, SETUP,
               '{bits}-bit bounded unsigned integers (max={max}, '
-              'use_masked={use_masked})'.format(max=max, use_masked=use_masked, bits=bits))
+              'use_masked={use_masked})'.format(max=max, use_masked=use_masked,
+                                                bits=bits))
 
 
 def timer_32bit():
@@ -152,9 +155,9 @@ def timer_normal_zig():
 
     parser = argparse.ArgumentParser()
     parser.add_argument('-f', '--full',
-                        help='Run benchmarks for a wide range of distributions.'
-                             ' If not provided, only tests the production of '
-                             'uniform values.',
+                        help='Run benchmarks for a wide range of '
+                             'distributions. If not provided, only tests the '
+                             'production of uniform values.',
                         dest='full', action='store_true')
     parser.add_argument('-bi', '--bounded-ints',
                         help='Included benchmark coverage of the bounded '
@@ -165,31 +168,39 @@ def timer_normal_zig():
     timer_uniform()
     if args.full:
         timer_raw()
+        timer_32bit()
+        timer_64bit()
+        timer_normal_zig()
+
         if args.bounded_ints:
             timer_bounded(use_masked=True)
-            timer_bounded(max=64, use_masked=False)  # Worst case for Numpy.
-            timer_bounded(max=95, use_masked=False)  # Typ. avrg. case for Numpy.
-            timer_bounded(max=127, use_masked=False)  # Best case for Numpy.
+            # Worst case for Numpy.
+            timer_bounded(max=64, use_masked=False)
+            # Typ. avrg. case for Numpy.
+            timer_bounded(max=95, use_masked=False)
+            # Best case for Numpy.
+            timer_bounded(max=127, use_masked=False)
 
             timer_bounded(16, use_masked=True)
-            timer_bounded(16, max=1024, use_masked=False)  # Worst case for Numpy.
-            timer_bounded(16, max=1535, use_masked=False)  # Typ. avrg. case for Numpy.
-            timer_bounded(16, max=2047, use_masked=False)  # Best case for Numpy.
-
-        timer_32bit()
+            # Worst case for Numpy.
+            timer_bounded(16, max=1024, use_masked=False)
+            # Typ. avrg. case for Numpy.
+            timer_bounded(16, max=1535, use_masked=False)
+            # Best case for Numpy.
+            timer_bounded(16, max=2047, use_masked=False)
 
-        if args.bounded_ints:
             timer_bounded(32, use_masked=True)
-            timer_bounded(32, max=1024, use_masked=False)  # Worst case for Numpy.
-            timer_bounded(32, max=1535, use_masked=False)  # Typ. avrg. case for Numpy.
-            timer_bounded(32, max=2047, use_masked=False)  # Best case for Numpy.
-
-        timer_64bit()
+            # Worst case for Numpy.
+            timer_bounded(32, max=1024, use_masked=False)
+            # Typ. avrg. case for Numpy.
+            timer_bounded(32, max=1535, use_masked=False)
+            # Best case for Numpy.
+            timer_bounded(32, max=2047, use_masked=False)
 
-        if args.bounded_ints:
             timer_bounded(64, use_masked=True)
-            timer_bounded(64, max=1024, use_masked=False)  # Worst case for Numpy.
-            timer_bounded(64, max=1535, use_masked=False)  # Typ. avrg. case for Numpy.
-            timer_bounded(64, max=2047, use_masked=False)  # Best case for Numpy.
-
-        timer_normal_zig()
+            # Worst case for Numpy.
+            timer_bounded(64, max=1024, use_masked=False)
+            # Typ. avrg. case for Numpy.
+            timer_bounded(64, max=1535, use_masked=False)
+            # Best case for Numpy.
+            timer_bounded(64, max=2047, use_masked=False)
diff --git a/doc/source/performance.py b/doc/source/performance.py
@@ -12,18 +12,17 @@
 SIZE = 25000
 PRNGS = [DSFMT, MT19937, MT64, Philox, PCG64, ThreeFry, Xoshiro256, Xoshiro512]
 
-funcs = {'32-bit Unsigned Ints':
-             f'integers(2**32, dtype="uint32", size={SIZE})',
-         '64-bit Unsigned Ints':
-             f'integers(2**64, dtype="uint64", size={SIZE})',
-         'Uniforms': f'random(size={SIZE})',
-         'Complex Normals': f'complex_normal(size={SIZE})',
-         'Normals': f'standard_normal(size={SIZE})',
-         'Exponentials': f'standard_exponential(size={SIZE})',
-         'Gammas': f'standard_gamma(3.0,size={SIZE})',
-         'Binomials': f'binomial(9, .1, size={SIZE})',
-         'Laplaces': f'laplace(size={SIZE})',
-         'Poissons': f'poisson(3.0, size={SIZE})', }
+funcs = OrderedDict()
+funcs['32-bit Unsigned Ints'] = f'integers(2**32, dtype="uint32", size={SIZE})'
+funcs['64-bit Unsigned Ints'] = f'integers(2**64, dtype="uint64", size={SIZE})'
+funcs['Uniform'] = f'random(size={SIZE})'
+funcs['Exponential'] = f'standard_exponential(size={SIZE})'
+funcs['Normal'] = f'standard_normal(size={SIZE})'
+funcs['Gamma'] = f'standard_gamma(3.0,size={SIZE})'
+funcs['Complex Normal'] = f'complex_normal(size={SIZE})'
+funcs['Binomial'] = f'binomial(9, .1, size={SIZE})'
+funcs['Laplace'] = f'laplace(size={SIZE})'
+funcs['Poisson'] = f'poisson(3.0, size={SIZE})'
 
 setup = """
 from randomgen import {prng}, Generator
@@ -73,11 +72,16 @@
 table = table.T
 table = table.reindex(order)
 table = table.T
-table.pop('Xoshiro512')
-table.pop('DSFMT')
 table = 1000000 * table / (SIZE * NUMBER)
 print(table.to_csv(float_format='%0.1f'))
 
+try:
+    from tabulate import tabulate
+
+    print(tabulate(table, headers='keys', tablefmt='psql'))
+except ImportError:
+    pass
+
 rel = table.loc[:, ['NumPy']].values @ np.ones((1, table.shape[1])) / table
 rel.pop('NumPy')
 rel = rel.T

diff --git a/randomgen/src/mt19937/mt19937.c b/randomgen/src/mt19937/mt19937.c
@@ -98,10 +98,10 @@ void mt19937_gen(mt19937_state *state) {
   state->pos = 0;
 }
 
-extern inline uint64_t mt19937_next64(mt19937_state *state);
+extern INLINE uint64_t mt19937_next64(mt19937_state *state);
 
-extern inline uint32_t mt19937_next32(mt19937_state *state);
+extern INLINE uint32_t mt19937_next32(mt19937_state *state);
 
-extern inline double mt19937_next_double(mt19937_state *state);
+extern INLINE double mt19937_next_double(mt19937_state *state);
 
 void mt19937_jump(mt19937_state *state) { mt19937_jump_state(state, poly); }
diff --git a/randomgen/src/mt19937/mt19937.h b/randomgen/src/mt19937/mt19937.h
@@ -1,17 +1,17 @@
 #pragma once
 #include <math.h>
+
 #ifdef _WIN32
 #if _MSC_VER == 1500
-#include "../common/stdint.h"
+#include "../common/inttypes.h"
+#define INLINE __forceinline
 #else
-#include <stdint.h>
+#include <inttypes.h>
+#define INLINE __inline __forceinline
 #endif
 #else
-#include <stdint.h>
-#endif
-
-#ifdef _WIN32
-#define inline __forceinline
+#include <inttypes.h>
+#define INLINE inline
 #endif
 
 #define RK_STATE_LEN 624
@@ -32,7 +32,7 @@ extern void mt19937_seed(mt19937_state *state, uint32_t seed);
 extern void mt19937_gen(mt19937_state *state);
 
 /* Slightly optimized reference implementation of the Mersenne Twister */
-static inline uint32_t mt19937_next(mt19937_state *state) {
+static INLINE uint32_t mt19937_next(mt19937_state *state) {
   uint32_t y;
 
   if (state->pos == RK_STATE_LEN) {
@@ -53,15 +53,15 @@ static inline uint32_t mt19937_next(mt19937_state *state) {
 extern void mt19937_init_by_array(mt19937_state *state, uint32_t *init_key,
                                   int key_length);
 
-static inline uint64_t mt19937_next64(mt19937_state *state) {
+static INLINE uint64_t mt19937_next64(mt19937_state *state) {
   return (uint64_t)mt19937_next(state) << 32 | mt19937_next(state);
 }
 
-static inline uint32_t mt19937_next32(mt19937_state *state) {
+static INLINE uint32_t mt19937_next32(mt19937_state *state) {
   return mt19937_next(state);
 }
 
-static inline double mt19937_next_double(mt19937_state *state) {
+static INLINE double mt19937_next_double(mt19937_state *state) {
   int32_t a = mt19937_next(state) >> 5, b = mt19937_next(state) >> 6;
   return (a * 67108864.0 + b) / 9007199254740992.0;
 }

diff --git a/randomgen/src/pcg64/pcg64.c b/randomgen/src/pcg64/pcg64.c
@@ -24,17 +24,17 @@
 
 #include "pcg64.h"
 
-extern inline void pcg_setseq_128_step_r(pcg_state_setseq_128 *rng);
-extern inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state);
-extern inline void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng,
+extern INLINE void pcg_setseq_128_step_r(pcg_state_setseq_128 *rng);
+extern INLINE uint64_t pcg_output_xsl_rr_128_64(pcg128_t state);
+extern INLINE void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng,
                                             pcg128_t initstate,
                                             pcg128_t initseq);
-extern inline uint64_t
+extern INLINE uint64_t
 pcg_setseq_128_xsl_rr_64_random_r(pcg_state_setseq_128 *rng);
-extern inline uint64_t
+extern INLINE uint64_t
 pcg_setseq_128_xsl_rr_64_boundedrand_r(pcg_state_setseq_128 *rng,
                                        uint64_t bound);
-extern inline void pcg_setseq_128_advance_r(pcg_state_setseq_128 *rng,
+extern INLINE void pcg_setseq_128_advance_r(pcg_state_setseq_128 *rng,
                                             pcg128_t delta);
 
 /* Multi-step advance functions (jump-ahead, jump-back)
@@ -89,8 +89,8 @@ pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, pcg128_t cur_mult,
 
 #endif
 
-extern inline uint64_t pcg64_next64(pcg64_state *state);
-extern inline uint32_t pcg64_next32(pcg64_state *state);
+extern INLINE uint64_t pcg64_next64(pcg64_state *state);
+extern INLINE uint32_t pcg64_next32(pcg64_state *state);
 
 extern void pcg64_advance(pcg64_state *state, uint64_t *step) {
   pcg128_t delta;