Skip to content
Permalink
Browse files
RNG-167: Sampling from a t-distribution
  • Loading branch information
aherbert committed Mar 2, 2022
1 parent 1d2722f commit 88608c23a424ce54fb96b51f1d677eeff35c1607
Showing 5 changed files with 311 additions and 0 deletions.
@@ -0,0 +1,220 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.rng.sampling.distribution;

import java.util.function.DoubleUnaryOperator;
import org.apache.commons.rng.UniformRandomProvider;

/**
* Sampling from a T distribution.
*
* <p>Uses Bailey's algorithm for t-distribution sampling:</p>
*
* <blockquote>
* <pre>
* Bailey, R. W. (1994)
* "Polar Generation of Random Variates with the t-Distribution."
* Mathematics of Computation 62, 779-781.
* </pre>
* </blockquote>
*
* <p>Sampling uses {@link UniformRandomProvider#nextLong()}.</p>
*
* @see <a href="https://en.wikipedia.org/wiki/Student%27s_t-distribution">Student's T distribution (wikipedia)</a>
* @see <a href="https://doi.org/10.2307/2153537">Mathematics of Computation, 62, 779-781</a>
* @since 1.5
*/
public abstract class TSampler implements SharedStateContinuousSampler {
/** Threshold for huge degrees of freedom. Above this value the CDF of the t distribution
* matches the normal distribution. Value is 2/eps (where eps is the machine epsilon)
* or approximately 9.0e15. */
private static final double HUGE_DF = 0x1.0p53;

/** Source of randomness. */
private final UniformRandomProvider rng;

/**
* Sample from a t-distribution using Bailey's algorithm.
*/
private static final class StudentsTSampler extends TSampler {
/** Threshold for large degrees of freedom. */
private static final double LARGE_DF = 25;
/** The multiplier to convert the least significant 53-bits of a {@code long} to a
* uniform {@code double}. */
private static final double DOUBLE_MULTIPLIER = 0x1.0p-53;

/** Degrees of freedom. */
private final double df;
/** Function to compute pow(x, -2/v) - 1, where v = degrees of freedom. */
private final DoubleUnaryOperator powm1;

/**
* @param rng Generator of uniformly distributed random numbers.
* @param v Degrees of freedom.
*/
StudentsTSampler(UniformRandomProvider rng,
double v) {
super(rng);
df = v;

// The sampler requires pow(w, -2/v) - 1 with
// 0 <= w <= 1; Expected(w) = sqrt(0.5).
// When the exponent is small then pow(x, y) -> 1.
// This affects large degrees of freedom.
final double exponent = -2 / v;
powm1 = v > LARGE_DF ?
x -> Math.expm1(Math.log(x) * exponent) :
x -> Math.pow(x, exponent) - 1;
}

/**
* @param rng Generator of uniformly distributed random numbers.
* @param source Source to copy.
*/
private StudentsTSampler(UniformRandomProvider rng,
StudentsTSampler source) {
super(rng);
df = source.df;
powm1 = source.powm1;
}

/** {@inheritDoc} */
@Override
public double sample() {
// Require u and v in [0, 1] and a random sign.
// Create u in (0, 1] to avoid generating nan
// from u*u/w (0/0) or r2*c2 (inf*0).
final double u = InternalUtils.makeNonZeroDouble(nextLong());
final double v = makeSignedDouble(nextLong());
final double w = u * u + v * v;
if (w > 1) {
// Rejection frequency = 1 - pi/4 = 0.215.
// Recursion will generate stack overflow given a broken RNG
// and avoids an infinite loop.
return sample();
}
// Sidestep a square-root calculation.
final double c2 = u * u / w;
final double r2 = df * powm1.applyAsDouble(w);
// Choose sign at random from the sign of v.
return Math.copySign(Math.sqrt(r2 * c2), v);
}

/** {@inheritDoc} */
@Override
public StudentsTSampler withUniformRandomProvider(UniformRandomProvider rng) {
return new StudentsTSampler(rng, this);
}

/**
* Creates a signed double in the range {@code [-1, 1)}. The magnitude is sampled evenly
* from the 2<sup>54</sup> dyadic rationals in the range.
*
* <p>Note: This method will not return samples for both -0.0 and 0.0.
*
* @param bits the bits
* @return the double
*/
private static double makeSignedDouble(long bits) {
// As per o.a.c.rng.core.utils.NumberFactory.makeDouble(long) but using a signed
// shift of 10 in place of an unsigned shift of 11.
// Use the upper 54 bits on the assumption they are more random.
// The sign bit is maintained by the signed shift.
// The next 53 bits generates a magnitude in the range [0, 2^53) or [-2^53, 0).
return (bits >> 10) * DOUBLE_MULTIPLIER;
}
}

/**
* Sample from a t-distribution using a normal distribution.
* This is used when the degrees of freedom is extremely large (e.g. {@code > 1e16}).
*/
private static final class NormalTSampler extends TSampler {
/** Underlying normalized Gaussian sampler. */
private final NormalizedGaussianSampler sampler;

/**
* @param rng Generator of uniformly distributed random numbers.
*/
NormalTSampler(UniformRandomProvider rng) {
super(rng);
this.sampler = ZigguratSampler.NormalizedGaussian.of(rng);
}

/** {@inheritDoc} */
@Override
public double sample() {
return sampler.sample();

}

/** {@inheritDoc} */
@Override
public NormalTSampler withUniformRandomProvider(UniformRandomProvider rng) {
return new NormalTSampler(rng);
}
}

/**
* @param rng Generator of uniformly distributed random numbers.
*/
TSampler(UniformRandomProvider rng) {
this.rng = rng;
}

/** {@inheritDoc} */
// Redeclare the signature to return a TSampler not a SharedStateContinuousSampler
@Override
public abstract TSampler withUniformRandomProvider(UniformRandomProvider rng);

/**
* Generates a {@code long} value.
* Used by algorithm implementations without exposing access to the RNG.
*
* @return the next random value
*/
long nextLong() {
return rng.nextLong();
}

/** {@inheritDoc} */
@Override
public String toString() {
return "Student's t deviate [" + rng.toString() + "]";
}

/**
* Create a new t distribution sampler.
*
* @param rng Generator of uniformly distributed random numbers.
* @param degreesOfFreedom Degrees of freedom.
* @return the sampler
* @throws IllegalArgumentException if {@code degreesOfFreedom <= 0}
*/
public static TSampler of(UniformRandomProvider rng,
double degreesOfFreedom) {
if (degreesOfFreedom > HUGE_DF) {
return new NormalTSampler(rng);
} else if (degreesOfFreedom > 0) {
return new StudentsTSampler(rng, degreesOfFreedom);
} else {
// df <= 0 or nan
throw new IllegalArgumentException(
"degrees of freedom is not strictly positive: " + degreesOfFreedom);
}
}
}
@@ -254,6 +254,21 @@ public final class ContinuousSamplersList {
final double dofT = 0.76543;
add(LIST, new org.apache.commons.math3.distribution.TDistribution(unusedRng, dofT),
RandomSource.ISAAC.create());
// T.
add(LIST, new org.apache.commons.math3.distribution.TDistribution(unusedRng, dofT),
TSampler.of(RandomSource.SFC_64.create(), dofT));
// T with 'large' degrees of freedom.
final double dofTlarge = 30;
add(LIST, new org.apache.commons.math3.distribution.TDistribution(unusedRng, dofTlarge),
TSampler.of(RandomSource.XO_SHI_RO_256_PP.create(), dofTlarge));
// T with 'huge' degrees of freedom (approaches a normal distribution).
// Deciles are computed incorrectly using Commons Math; values computed using Matlab.
// Note: DF is below the switch to using a normal distribution.
final double dofTHuge = 1e15;
add(LIST, new double[] {-1.2815515655446015, -0.84162123357291463, -0.52440051270804089,
-0.25334710313579983, 0, 0.25334710313579983, 0.52440051270804089, 0.84162123357291474,
1.2815515655446015, Double.POSITIVE_INFINITY},
TSampler.of(RandomSource.XO_SHI_RO_256_SS.create(), dofTHuge));

// Triangular ("inverse method").
final double aTriangle = -0.76543;
@@ -0,0 +1,67 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.rng.sampling.distribution;

import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.RandomAssert;
import org.apache.commons.rng.simple.RandomSource;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

/**
* Test for the {@link TSampler}.
*/
class TSamplerTest {
/**
* Test the constructor with an invalid degrees of freedom.
*/
@ParameterizedTest
@ValueSource(doubles = {0, -1, Double.NaN})
void testConstructorThrowsWithBadDegreesOfFreedom(double degreesOfFreedom) {
final UniformRandomProvider rng = RandomSource.SPLIT_MIX_64.create(0L);
Assertions.assertThrows(IllegalArgumentException.class,
() -> TSampler.of(rng, degreesOfFreedom));
}

/**
* Test the SharedStateSampler implementation.
*/
@ParameterizedTest
@ValueSource(doubles = {4.56, 1e16})
void testSharedStateSampler(double degreesOfFreedom) {
final UniformRandomProvider rng1 = RandomSource.SPLIT_MIX_64.create(0L);
final UniformRandomProvider rng2 = RandomSource.SPLIT_MIX_64.create(0L);
final TSampler sampler1 = TSampler.of(rng1, degreesOfFreedom);
final TSampler sampler2 = sampler1.withUniformRandomProvider(rng2);
RandomAssert.assertProduceSameSequence(sampler1, sampler2);
}

/**
* Test extremely large degrees of freedom is approximated using a normal distribution.
*/
@Test
void testExtremelyLargeDegreesOfFreedom() {
final UniformRandomProvider rng1 = RandomSource.SPLIT_MIX_64.create(0L);
final UniformRandomProvider rng2 = RandomSource.SPLIT_MIX_64.create(0L);
final double degreesOfFreedom = 1e16;
final ContinuousSampler sampler1 = TSampler.of(rng1, degreesOfFreedom);
final ContinuousSampler sampler2 = ZigguratSampler.NormalizedGaussian.of(rng2);
RandomAssert.assertProduceSameSequence(sampler1, sampler2);
}
}
@@ -81,6 +81,9 @@ re-run tests that fail, and pass the build if they succeed
within the allotted number of reruns (the test will be marked
as 'flaky' in the report).
">
<action dev="aherbert" type="add" issue="167">
New "TSampler" class to sample from Student's t-distribution.
</action>
<action dev="aherbert" type="fix" issue="166">
Update "LogNormalSampler" and "BoxMullerLogNormalSampler" to allow a negative mean for
the natural logarithm of the distribution values.
@@ -95,6 +95,12 @@
or @SimpleName='ExamplesSamplingCommand' or @SimpleName='UniformSamplingVisualCheckCommand']"/>
</properties>
</rule>
<rule ref="category/java/bestpractices.xml/AccessorClassGeneration">
<properties>
<!-- False positive -->
<property name="violationSuppressXPath" value="//ClassOrInterfaceDeclaration[@SimpleName='TSampler']"/>
</properties>
</rule>

<rule ref="category/java/codestyle.xml/ClassNamingConventions">
<properties>

0 comments on commit 88608c2

Please sign in to comment.