Skip to content

Commit

Permalink
STATISTICS-35: Poisson dist to use a Gaussian sampler for large mean
Browse files Browse the repository at this point in the history
  • Loading branch information
aherbert committed Oct 24, 2021
1 parent 8ea9f5b commit e48e870
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@

import org.apache.commons.numbers.gamma.RegularizedGamma;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.distribution.GaussianSampler;
import org.apache.commons.rng.sampling.distribution.PoissonSampler;
import org.apache.commons.rng.sampling.distribution.SharedStateContinuousSampler;
import org.apache.commons.rng.sampling.distribution.ZigguratSampler;

/**
* Implementation of the <a href="http://en.wikipedia.org/wiki/Poisson_distribution">Poisson distribution</a>.
Expand All @@ -30,6 +33,8 @@ public final class PoissonDistribution extends AbstractDiscreteDistribution {
private static final int DEFAULT_MAX_ITERATIONS = 10000000;
/** Default convergence criterion. */
private static final double DEFAULT_EPSILON = 1e-12;
/** Upper bound on the mean to use the PoissonSampler. */
private static final double MAX_MEAN = 0.5 * Integer.MAX_VALUE;
/** Mean of the distribution. */
private final double mean;
/** Maximum number of iterations for cumulative probability. */
Expand Down Expand Up @@ -180,6 +185,20 @@ public boolean isSupportConnected() {
@Override
public DiscreteDistribution.Sampler createSampler(final UniformRandomProvider rng) {
// Poisson distribution sampler.
return PoissonSampler.of(rng, mean)::sample;
// Large means are not supported.
// See STATISTICS-35.
final double mu = getMean();
if (mu < MAX_MEAN) {
return PoissonSampler.of(rng, mu)::sample;
}
// Switch to a Gaussian approximation.
// Use a 0.5 shift to round samples to the correct integer.
final SharedStateContinuousSampler s =
GaussianSampler.of(ZigguratSampler.NormalizedGaussian.of(rng),
mu + 0.5, Math.sqrt(mu));
return () -> {
final double x = s.sample();
return Math.max(0, (int) x);
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,3 @@ logpmf.values = \
-13.015480041503906, -12.483856201171875, -12.016883850097656,\
-11.800201416015625, -11.694816589355469, -11.6627197265625 ,\
-11.6627197265625

# Inaccurate with a large range
disable.pmf.sum = true
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Large mean (2^30 * 1.5, exceeds the supported range for the PoissonSampler)
parameters = 1610612736.0
# TODO: Not very accurate. Check other implementations.
tolerance.relative = 1e-5
mean = 1610612736
variance = 1610612736
lower = 0
# Reference values are from scipy stats poisson
cdf.points = \
1610452208, 1610492340, 1610532472, \
1610572604, 1610612736, 1610652868, \
1610693000, 1610733132, 1610773264
cdf.values = \
3.16704545277469494e-05, 1.34994771620792087e-03,\
2.27512432610545989e-02, 1.58660577269995412e-01,\
5.00007613750695579e-01, 8.41345237492141296e-01,\
9.77248787520595852e-01, 9.98649858900940601e-01,\
9.99968316351697473e-01
pmf.values = \
3.33458673185959287e-09, 1.10433086816004676e-07,\
1.34536580959954562e-06, 6.02939624178527286e-06,\
9.94064214999183118e-06, 6.02932724121602734e-06,\
1.34538633840926817e-06, 1.10449938853452861e-07,\
3.33599899955430242e-09
logpmf.values = \
-19.51891708374023438, -16.01885604858398438, -13.5188446044921875,\
-12.01886367797851562, -11.51887893676757812, -12.0188751220703125,\
-13.518829345703125 , -16.01870346069335938, -19.51849365234375
sf.values = \
9.99968329545472212e-01, 9.98650052283792111e-01,\
9.77248756738945401e-01, 8.41339422730004616e-01,\
4.99992386249304421e-01, 1.58654762507858649e-01,\
2.27512124794041967e-02, 1.35014109905945011e-03,\
3.16836483025270742e-05

0 comments on commit e48e870

Please sign in to comment.