Skip to content
Permalink
Browse files
RNG-173: BaseProvider static method to extend an input array seed
  • Loading branch information
aherbert committed Mar 30, 2022
1 parent cc6b0c0 commit 34b22b32c9f854b94409f8981a14ccf13333c37f
Showing 4 changed files with 262 additions and 0 deletions.
@@ -17,6 +17,7 @@

package org.apache.commons.rng.core;

import java.util.Arrays;
import org.apache.commons.rng.RestorableUniformRandomProvider;
import org.apache.commons.rng.RandomProviderState;

@@ -29,6 +30,16 @@
private static final String NOT_POSITIVE = "Must be strictly positive: ";
/** 2^32. */
private static final long POW_32 = 1L << 32;
/**
* The fractional part of the the golden ratio, phi, scaled to 64-bits and rounded to odd.
* <pre>
* phi = (sqrt(5) - 1) / 2) * 2^64
* </pre>
* @see <a href="https://en.wikipedia.org/wiki/Golden_ratio">Golden ratio</a>
*/
private static final long GOLDEN_RATIO_64 = 0x9e3779b97f4a7c15L;
/** The fractional part of the the golden ratio, phi, scaled to 32-bits and rounded to odd. */
private static final int GOLDEN_RATIO_32 = 0x9e3779b9;

/** {@inheritDoc} */
@Override
@@ -320,4 +331,140 @@ private static long scrambleWell(long n,
// Code inspired from "AbstractWell" class.
return scramble(n, 1812433253L, 30, add);
}

/**
* Extend the seed to the specified minimum length. If the seed is equal or greater than the
* minimum length, return the same seed unchanged. Otherwise:
* <ol>
* <li>Create a new array of the specified length
* <li>Copy all elements of the seed into the array
* <li>Fill the remaining values. The additional values will have at most one occurrence
* of zero. If the original seed is all zero, the first extended value will be non-zero.
* </li>
* </ol>
*
* <p>This method can be used in constructors that must pass their seed to the super class
* to avoid a duplication of seed expansion to the minimum length required by the super class
* and the class:
* <pre>
* public RNG extends AnotherRNG {
* public RNG(long[] seed) {
* super(seed = extendSeed(seed, SEED_SIZE));
* // Use seed for additional state ...
* }
* }
* </pre>
*
* <p>Note using the state filling procedure provided in {@link #fillState(long[], long[])}
* is not possible as it is an instance method. Calling a seed extension routine must use a
* static method.
*
* <p>This method functions as if the seed has been extended using a
* {@link org.apache.commons.rng.core.source64.SplitMix64 SplitMix64}
* generator seeded with {@code seed[0]}, or zero if the input seed length is zero.
* <pre>
* if (seed.length &lt; length) {
* final long[] s = Arrays.copyOf(seed, length);
* final SplitMix64 rng = new SplitMix64(s[0]);
* for (int i = seed.length; i &lt; length; i++) {
* s[i] = rng.nextLong();
* }
* return s;
* }</pre>
*
* @param seed Input seed
* @param length The minimum length
* @return the seed
*/
protected static long[] extendSeed(long[] seed, int length) {
if (seed.length < length) {
final long[] s = Arrays.copyOf(seed, length);
// Fill the rest as if using a SplitMix64 RNG
long x = s[0];
for (int i = seed.length; i < length; i++) {
s[i] = stafford13(x += GOLDEN_RATIO_64);
}
return s;
}
return seed;
}

/**
* Extend the seed to the specified minimum length. If the seed is equal or greater than the
* minimum length, return the same seed unchanged. Otherwise:
* <ol>
* <li>Create a new array of the specified length
* <li>Copy all elements of the seed into the array
* <li>Fill the remaining values. The additional values will have at most one occurrence
* of zero. If the original seed is all zero, the first extended value will be non-zero.
* </li>
* </ol>
*
* <p>This method can be used in constructors that must pass their seed to the super class
* to avoid a duplication of seed expansion to the minimum length required by the super class
* and the class:
* <pre>
* public RNG extends AnotherRNG {
* public RNG(int[] seed) {
* super(seed = extendSeed(seed, SEED_SIZE));
* // Use seed for additional state ...
* }
* }
* </pre>
*
* <p>Note using the state filling procedure provided in {@link #fillState(int[], int[])}
* is not possible as it is an instance method. Calling a seed extension routine must use a
* static method.
*
* <p>This method functions as if the seed has been extended using a
* {@link org.apache.commons.rng.core.source64.SplitMix64 SplitMix64}-style 32-bit
* generator seeded with {@code seed[0]}, or zero if the input seed length is zero. The
* generator uses the 32-bit mixing function from MurmurHash3.
*
* @param seed Input seed
* @param length The minimum length
* @return the seed
*/
protected static int[] extendSeed(int[] seed, int length) {
if (seed.length < length) {
final int[] s = Arrays.copyOf(seed, length);
// Fill the rest as if using a SplitMix64-style RNG for 32-bit output
int x = s[0];
for (int i = seed.length; i < length; i++) {
s[i] = murmur3(x += GOLDEN_RATIO_32);
}
return s;
}
return seed;
}

/**
* Perform variant 13 of David Stafford's 64-bit mix function.
* This is the mix function used in the {@link SplitMix64} RNG.
*
* <p>This is ranked first of the top 14 Stafford mixers.
*
* @param x the input value
* @return the output value
* @see <a href="http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html">Better
* Bit Mixing - Improving on MurmurHash3&#39;s 64-bit Finalizer.</a>
*/
private static long stafford13(long x) {
x = (x ^ (x >>> 30)) * 0xbf58476d1ce4e5b9L;
x = (x ^ (x >>> 27)) * 0x94d049bb133111ebL;
return x ^ (x >>> 31);
}

/**
* Perform the finalising 32-bit mix function of Austin Appleby's MurmurHash3.
*
* @param x the input value
* @return the output value
* @see <a href="https://github.com/aappleby/smhasher">SMHasher</a>
*/
private static int murmur3(int x) {
x = (x ^ (x >>> 16)) * 0x85ebca6b;
x = (x ^ (x >>> 13)) * 0xc2b2ae35;
return x ^ (x >>> 16);
}
}
@@ -17,6 +17,13 @@
package org.apache.commons.rng.core;

import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

import java.util.Arrays;
import java.util.SplittableRandom;

import org.apache.commons.rng.core.source64.SplitMix64;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Assumptions;

@@ -127,6 +134,103 @@ public double nextDouble() {
Assertions.assertThrows(IndexOutOfBoundsException.class, () -> rng.checkIndex(-10, 5, Integer.MAX_VALUE));
}

/**
* Test a seed can be extended to a required size by filling with a SplitMix64 generator.
*/
@ParameterizedTest
@ValueSource(ints = {0, 1, 2, 4, 5, 6, 7, 8, 9})
void testExpnadSeedLong(int length) {
// The seed does not matter.
// Create random seeds that are smaller or larger than length.
final SplittableRandom rng = new SplittableRandom();
for (long[] seed : new long[][] {
{},
rng.longs(1).toArray(),
rng.longs(2).toArray(),
rng.longs(3).toArray(),
rng.longs(4).toArray(),
rng.longs(5).toArray(),
rng.longs(6).toArray(),
rng.longs(7).toArray(),
rng.longs(8).toArray(),
rng.longs(9).toArray(),
}) {
Assertions.assertArrayEquals(expandSeed(length, seed),
BaseProvider.extendSeed(seed, length));
}
}

/**
* Expand the seed to the minimum specified length using a {@link SplitMix64} generator
* seeded with {@code seed[0]}, or zero if the seed length is zero.
*
* @param length the length
* @param seed the seed
* @return the seed
*/
private static long[] expandSeed(int length, long... seed) {
if (seed.length < length) {
final long[] s = Arrays.copyOf(seed, length);
final SplitMix64 rng = new SplitMix64(s[0]);
for (int i = seed.length; i < length; i++) {
s[i] = rng.nextLong();
}
return s;
}
return seed;
}

/**
* Test a seed can be extended to a required size by filling with a SplitMix64-style
* generator using MurmurHash3's 32-bit mix function.
*
* <p>There is no reference RNG for this output. The test uses fixed output computed
* from the reference c++ function in smhasher.
*/
@ParameterizedTest
@ValueSource(ints = {0, 1, 2, 4, 5, 6, 7, 8, 9})
void testExpandSeedInt(int length) {
// Reference output from the c++ function fmix32(uint32_t) in smhasher.
// https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
final int seedA = 0x012de1ba;
final int[] valuesA = {
0x2f66c8b6, 0x256c0269, 0x054ef409, 0x402425ba, 0x78ebf590, 0x76bea1db,
0x8bf5dcbe, 0x104ecdd4, 0x43cfc87e, 0xa33c7643, 0x4d210f56, 0xfa12093d,
};
// Values from a seed of zero
final int[] values0 = {
0x92ca2f0e, 0x3cd6e3f3, 0x1b147dcc, 0x4c081dbf, 0x487981ab, 0xdb408c9d,
0x78bc1b8f, 0xd83072e5, 0x65cbdd54, 0x1f4b8cef, 0x91783bb0, 0x0231739b,
};

// Create a random seed that is larger than the maximum length;
// start with the initial value
final int[] data = new SplittableRandom().ints(10).toArray();
data[0] = seedA;

for (int i = 0; i <= 9; i++) {
final int seedLength = i;
// Truncate the random seed
final int[] seed = Arrays.copyOf(data, seedLength);
// Create the expected output length.
// If too short it should be extended with values from the reference output
final int[] expected = Arrays.copyOf(seed, Math.max(seedLength, length));
if (expected.length == 0) {
// Edge case for zero length
Assertions.assertArrayEquals(new int[0],
BaseProvider.extendSeed(seed, length));
continue;
}
// Extend the truncated seed using the reference output.
// This may be seeded with zero or the non-zero initial value.
final int[] source = expected[0] == 0 ? values0 : valuesA;
System.arraycopy(source, 0, expected, seedLength, expected.length - seedLength);
Assertions.assertArrayEquals(expected,
BaseProvider.extendSeed(seed, length),
() -> String.format("%d -> %d", seedLength, length));
}
}

/**
* Dummy class for checking the behaviorof the IntProvider. Tests:
* <ul>
@@ -86,6 +86,10 @@ behavioural compatibility between releases; derived types
may break behavioural compatibility. Any functional changes
will be recorded in the release notes.
">
<action dev="aherbert" type="add" issue="173">
"BaseProvider": Add a static method to extend input int[] and long[] seeds to a
minimum length.
</action>
<action dev="aherbert" type="update" issue="171">
Recuce the memory footprint of the cached boolean and int source for the IntProvider
and LongProvider. This change has a performance improvement on some JDKs.
@@ -101,6 +101,13 @@
<property name="violationSuppressXPath" value="//ClassOrInterfaceDeclaration[@SimpleName='TSampler']"/>
</properties>
</rule>
<rule ref="category/java/bestpractices.xml/AvoidReassigningParameters">
<properties>
<!-- Hash functions are optimised for minimum byte size to allow inlining -->
<property name="violationSuppressXPath"
value="./ancestor::MethodDeclaration[@Name='stafford13' or @Name='murmur3']"/>
</properties>
</rule>

<rule ref="category/java/codestyle.xml/ClassNamingConventions">
<properties>

0 comments on commit 34b22b3

Please sign in to comment.