Skip to content

Commit

Permalink
Merge 416c52f into 4308cf7
Browse files Browse the repository at this point in the history
  • Loading branch information
bashtage committed Apr 2, 2019
2 parents 4308cf7 + 416c52f commit 6790768
Show file tree
Hide file tree
Showing 7 changed files with 2,117 additions and 2,067 deletions.
6 changes: 4 additions & 2 deletions randomgen/src/xoroshiro128/xoroshiro128-benchmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,16 @@

#define N 1000000000

int main() {
int main()
{
uint64_t count = 0, sum = 0;
uint64_t seed = 0xDEADBEAF;
s[0] = splitmix64_next(&seed);
s[1] = splitmix64_next(&seed);
int i;
clock_t begin = clock();
for (i = 0; i < N; i++) {
for (i = 0; i < N; i++)
{
sum += next();
count++;
}
Expand Down
33 changes: 22 additions & 11 deletions randomgen/src/xoroshiro128/xoroshiro128-test-data-gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,50 +21,61 @@

#define N 1000

int main() {
int main()
{
uint64_t sum = 0;
uint64_t state, seed = 0xDEADBEAF;
state = seed;
int i;
for (i = 0; i < 2; i++) {
for (i = 0; i < 2; i++)
{
s[i] = splitmix64_next(&state);
}
uint64_t store[N];
for (i = 0; i < N; i++) {
for (i = 0; i < N; i++)
{
store[i] = next();
}

FILE *fp;
fp = fopen("xoroshiro128-testset-1.csv", "w");
if (fp == NULL) {
if (fp == NULL)
{
printf("Couldn't open file\n");
return -1;
}
fprintf(fp, "seed, 0x%" PRIx64 "\n", seed);
for (i = 0; i < N; i++) {
for (i = 0; i < N; i++)
{
fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]);
if (i == 999) {
if (i == 999)
{
printf("%d, 0x%" PRIx64 "\n", i, store[i]);
}
}
fclose(fp);

seed = state = 0;
for (i = 0; i < 2; i++) {
for (i = 0; i < 2; i++)
{
s[i] = splitmix64_next(&state);
}
for (i = 0; i < N; i++) {
for (i = 0; i < N; i++)
{
store[i] = next();
}
fp = fopen("xoroshiro128-testset-2.csv", "w");
if (fp == NULL) {
if (fp == NULL)
{
printf("Couldn't open file\n");
return -1;
}
fprintf(fp, "seed, 0x%" PRIx64 "\n", seed);
for (i = 0; i < N; i++) {
for (i = 0; i < N; i++)
{
fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]);
if (i == 999) {
if (i == 999)
{
printf("%d, 0x%" PRIx64 "\n", i, store[i]);
}
}
Expand Down
46 changes: 24 additions & 22 deletions randomgen/src/xoroshiro128/xoroshiro128.c
Original file line number Diff line number Diff line change
@@ -1,52 +1,54 @@
/* Written in 2016 by David Blackman and Sebastiano Vigna (vigna@acm.org)
/* Written in 2016-2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
To the extent possible under law, the author has dedicated all copyright
and related and neighboring rights to this software to the public domain
worldwide. This software is distributed without any warranty.
See <http://creativecommons.org/publicdomain/zero/1.0/>. */

/* This is the successor to xorshift128+. It is the fastest full-period
generator passing BigCrush without systematic failures, but due to the
relatively short period it is acceptable only for applications with a
mild amount of parallelism; otherwise, use a xorshift1024* generator.
Beside passing BigCrush, this generator passes the PractRand test suite
up to (and included) 16TB, with the exception of binary rank tests, as
the lowest bit of this generator is an LFSR of degree 128. The next bit
can be described by an LFSR of degree 8256, but in the long run it will
fail linearity tests, too. The other bits needs a much higher degree to
be represented as LFSRs.
/* This is xoroshiro128+ 1.0, our best and fastest small-state generator
for floating-point numbers. We suggest to use its upper bits for
floating-point generation, as it is slightly faster than
xoroshiro128**. It passes all tests we are aware of except for the four
lower bits, which might fail linearity tests (and just those), so if
low linear complexity is not considered an issue (as it is usually the
case) it can be used to generate 64-bit outputs, too; moreover, this
generator has a very mild Hamming-weight dependency making our test
(http://prng.di.unimi.it/hwd.php) fail after 5 TB of output; we believe
this slight bias cannot affect any application. If you are concerned,
use xoroshiro128** or xoshiro256+.
We suggest to use a sign test to extract a random Boolean value, and
right shifts to extract subsets of bits.
Note that the generator uses a simulated rotate operation, which most C
compilers will turn into a single instruction. In Java, you can use
Long.rotateLeft(). In languages that do not make low-level rotation
instructions accessible xorshift128+ could be faster.
The state must be seeded so that it is not everywhere zero. If you have
a 64-bit seed, we suggest to seed a splitmix64 generator and use its
output to fill s. */
output to fill s.
NOTE: the parameters (a=24, b=16, b=37) of this version give slightly
better results in our test than the 2016 version (a=55, b=14, c=36).
*/

#include "xoroshiro128.h"

extern INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state);

extern INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state);

void xoroshiro128_jump(xoroshiro128_state *state) {
void xoroshiro128_jump(xoroshiro128_state *state)
{
int i, b;
uint64_t s0;
uint64_t s1;
static const uint64_t JUMP[] = {0xbeac0467eba5facb, 0xd86b048b86aa9922};
static const uint64_t JUMP[] = {0xdf900294d8f554a5, 0x170865df4b3201fc};

s0 = 0;
s1 = 0;
for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
for (b = 0; b < 64; b++) {
if (JUMP[i] & UINT64_C(1) << b) {
for (b = 0; b < 64; b++)
{
if (JUMP[i] & UINT64_C(1) << b)
{
s0 ^= state->s[0];
s1 ^= state->s[1];
}
Expand Down
22 changes: 14 additions & 8 deletions randomgen/src/xoroshiro128/xoroshiro128.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,35 +14,41 @@
#define INLINE inline
#endif

typedef struct s_xoroshiro128_state {
typedef struct s_xoroshiro128_state
{
uint64_t s[2];
int has_uint32;
uint32_t uinteger;
} xoroshiro128_state;

static INLINE uint64_t rotl(const uint64_t x, int k) {
static INLINE uint64_t rotl(const uint64_t x, int k)
{
return (x << k) | (x >> (64 - k));
}

static INLINE uint64_t xoroshiro128_next(uint64_t *s) {
static INLINE uint64_t xoroshiro128_next(uint64_t *s)
{
const uint64_t s0 = s[0];
uint64_t s1 = s[1];
const uint64_t result = s0 + s1;

s1 ^= s0;
s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
s[1] = rotl(s1, 36); // c
s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
s[1] = rotl(s1, 37); // c

return result;
}

static INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state) {
static INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state)
{
return xoroshiro128_next(&state->s[0]);
}

static INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state) {
static INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state)
{
uint64_t next;
if (state->has_uint32) {
if (state->has_uint32)
{
state->has_uint32 = 0;
return state->uinteger;
}
Expand Down
81 changes: 55 additions & 26 deletions randomgen/src/xoroshiro128/xoroshiro128plus.orig.c
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
/* Written in 2016 by David Blackman and Sebastiano Vigna (vigna@acm.org)
/* Written in 2016-2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
To the extent possible under law, the author has dedicated all copyright
and related and neighboring rights to this software to the public domain
Expand All @@ -8,44 +8,45 @@ See <http://creativecommons.org/publicdomain/zero/1.0/>. */

#include <stdint.h>

/* This is the successor to xorshift128+. It is the fastest full-period
generator passing BigCrush without systematic failures, but due to the
relatively short period it is acceptable only for applications with a
mild amount of parallelism; otherwise, use a xorshift1024* generator.
Beside passing BigCrush, this generator passes the PractRand test suite
up to (and included) 16TB, with the exception of binary rank tests, as
the lowest bit of this generator is an LFSR of degree 128. The next bit
can be described by an LFSR of degree 8256, but in the long run it will
fail linearity tests, too. The other bits needs a much higher degree to
be represented as LFSRs.
/* This is xoroshiro128+ 1.0, our best and fastest small-state generator
for floating-point numbers. We suggest to use its upper bits for
floating-point generation, as it is slightly faster than
xoroshiro128**. It passes all tests we are aware of except for the four
lower bits, which might fail linearity tests (and just those), so if
low linear complexity is not considered an issue (as it is usually the
case) it can be used to generate 64-bit outputs, too; moreover, this
generator has a very mild Hamming-weight dependency making our test
(http://prng.di.unimi.it/hwd.php) fail after 5 TB of output; we believe
this slight bias cannot affect any application. If you are concerned,
use xoroshiro128** or xoshiro256+.
We suggest to use a sign test to extract a random Boolean value, and
right shifts to extract subsets of bits.
Note that the generator uses a simulated rotate operation, which most C
compilers will turn into a single instruction. In Java, you can use
Long.rotateLeft(). In languages that do not make low-level rotation
instructions accessible xorshift128+ could be faster.
The state must be seeded so that it is not everywhere zero. If you have
a 64-bit seed, we suggest to seed a splitmix64 generator and use its
output to fill s. */
output to fill s.
NOTE: the parameters (a=24, b=16, b=37) of this version give slightly
better results in our test than the 2016 version (a=55, b=14, c=36).
*/

uint64_t s[2];

static inline uint64_t rotl(const uint64_t x, int k) {
static inline uint64_t rotl(const uint64_t x, int k)
{
return (x << k) | (x >> (64 - k));
}

uint64_t next(void) {
uint64_t next(void)
{
const uint64_t s0 = s[0];
uint64_t s1 = s[1];
const uint64_t result = s0 + s1;

s1 ^= s0;
s[0] = rotl(s0, 55) ^ s1 ^ (s1 << 14); // a, b
s[1] = rotl(s1, 36); // c
s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b
s[1] = rotl(s1, 37); // c

return result;
}
Expand All @@ -54,14 +55,42 @@ uint64_t next(void) {
to 2^64 calls to next(); it can be used to generate 2^64
non-overlapping subsequences for parallel computations. */

void jump(void) {
static const uint64_t JUMP[] = {0xbeac0467eba5facb, 0xd86b048b86aa9922};
void jump(void)
{
static const uint64_t JUMP[] = {0xdf900294d8f554a5, 0x170865df4b3201fc};

uint64_t s0 = 0;
uint64_t s1 = 0;
for (int i = 0; i < sizeof JUMP / sizeof *JUMP; i++)
for (int b = 0; b < 64; b++) {
if (JUMP[i] & UINT64_C(1) << b) {
for (int b = 0; b < 64; b++)
{
if (JUMP[i] & UINT64_C(1) << b)
{
s0 ^= s[0];
s1 ^= s[1];
}
next();
}
s[0] = s0;
s[1] = s1;
}

/* This is the long-jump function for the generator. It is equivalent to
2^96 calls to next(); it can be used to generate 2^32 starting points,
from each of which jump() will generate 2^32 non-overlapping
subsequences for parallel distributed computations. */

void long_jump(void)
{
static const uint64_t LONG_JUMP[] = {0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1};

uint64_t s0 = 0;
uint64_t s1 = 0;
for (int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++)
for (int b = 0; b < 64; b++)
{
if (LONG_JUMP[i] & UINT64_C(1) << b)
{
s0 ^= s[0];
s1 ^= s[1];
}
Expand Down

0 comments on commit 6790768

Please sign in to comment.