/
ContinuousDistributionAbstractTest.java
502 lines (444 loc) · 19.5 KB
/
ContinuousDistributionAbstractTest.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.statistics.distribution;
import java.util.ArrayList;
import java.util.Collections;
import org.apache.commons.math3.analysis.UnivariateFunction;
import org.apache.commons.math3.analysis.integration.BaseAbstractUnivariateIntegrator;
import org.apache.commons.math3.analysis.integration.IterativeLegendreGaussIntegrator;
import org.apache.commons.rng.simple.RandomSource;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
/**
* Abstract base class for {@link ContinuousDistribution} tests.
* <p>
* To create a concrete test class for a continuous distribution
* implementation, first implement makeDistribution() to return a distribution
* instance to use in tests. Then implement each of the test data generation
* methods below. In each case, the test points and test values arrays
* returned represent parallel arrays of inputs and expected values for the
* distribution returned by makeDistribution(). Default implementations
* are provided for the makeInverseXxx methods that just invert the mapping
* defined by the arrays returned by the makeCumulativeXxx methods.
* <p>
* makeCumulativeTestPoints() -- arguments used to test cumulative probabilities
* makeCumulativeTestValues() -- expected cumulative probabilities
* makeDensityTestValues() -- expected density values at cumulativeTestPoints
* makeInverseCumulativeTestPoints() -- arguments used to test inverse cdf
* makeInverseCumulativeTestValues() -- expected inverse cdf values
* <p>
* To implement additional test cases with different distribution instances and
* test data, use the setXxx methods for the instance data in test cases and
* call the verifyXxx methods to verify results.
* <p>
* Error tolerance can be overridden by implementing getTolerance().
* <p>
* Test data should be validated against reference tables or other packages
* where possible, and the source of the reference data and/or validation
* should be documented in the test cases. A framework for validating
* distribution data against R is included in the /src/test/R source tree.
* <p>
* See {@link NormalDistributionTest} and {@link ChiSquaredDistributionTest}
* for examples.
*/
abstract class ContinuousDistributionAbstractTest {
//-------------------- Private test instance data -------------------------
/** Distribution instance used to perform tests */
private ContinuousDistribution distribution;
/** Tolerance used in comparing expected and returned values */
private double tolerance = 1e-4;
/** Arguments used to test cumulative probability density calculations */
private double[] cumulativeTestPoints;
/** Values used to test cumulative probability density calculations */
private double[] cumulativeTestValues;
/** Arguments used to test inverse cumulative probability density calculations */
private double[] inverseCumulativeTestPoints;
/** Values used to test inverse cumulative probability density calculations */
private double[] inverseCumulativeTestValues;
/** Values used to test density calculations */
private double[] densityTestValues;
/** Values used to test logarithmic density calculations */
private double[] logDensityTestValues;
//-------------------- Abstract methods -----------------------------------
/** Creates the default continuous distribution instance to use in tests. */
public abstract ContinuousDistribution makeDistribution();
/** Creates the default cumulative probability test input values */
public abstract double[] makeCumulativeTestPoints();
/** Creates the default cumulative probability test expected values */
public abstract double[] makeCumulativeTestValues();
/** Creates the default density test expected values */
public abstract double[] makeDensityTestValues();
/** Creates the default logarithmic density test expected values.
* The default implementation simply computes the logarithm
* of each value returned by {@link #makeDensityTestValues()}.*/
public double[] makeLogDensityTestValues() {
final double[] density = makeDensityTestValues();
final double[] logDensity = new double[density.length];
for (int i = 0; i < density.length; i++) {
logDensity[i] = Math.log(density[i]);
}
return logDensity;
}
//---- Default implementations of inverse test data generation methods ----
/** Creates the default inverse cumulative probability test input values */
public double[] makeInverseCumulativeTestPoints() {
return makeCumulativeTestValues();
}
/** Creates the default inverse cumulative probability density test expected values */
public double[] makeInverseCumulativeTestValues() {
return makeCumulativeTestPoints();
}
//-------------------- Setup / tear down ----------------------------------
/**
* Setup sets all test instance data to default values.
* <p>
* This method is @BeforeEach (created for each test) as certain test methods may wish
* to alter the defaults.
*/
@BeforeEach
void setUp() {
distribution = makeDistribution();
cumulativeTestPoints = makeCumulativeTestPoints();
cumulativeTestValues = makeCumulativeTestValues();
inverseCumulativeTestPoints = makeInverseCumulativeTestPoints();
inverseCumulativeTestValues = makeInverseCumulativeTestValues();
densityTestValues = makeDensityTestValues();
logDensityTestValues = makeLogDensityTestValues();
}
/**
* Cleans up test instance data
*/
@AfterEach
void tearDown() {
distribution = null;
cumulativeTestPoints = null;
cumulativeTestValues = null;
inverseCumulativeTestPoints = null;
inverseCumulativeTestValues = null;
densityTestValues = null;
logDensityTestValues = null;
}
//-------------------- Verification methods -------------------------------
/**
* Verifies that cumulative probability density calculations match expected values
* using current test instance data
*/
protected void verifyCumulativeProbabilities() {
// verify cumulativeProbability(double)
for (int i = 0; i < cumulativeTestPoints.length; i++) {
TestUtils.assertEquals("Incorrect cumulative probability value returned for " +
cumulativeTestPoints[i], cumulativeTestValues[i],
distribution.cumulativeProbability(cumulativeTestPoints[i]),
getTolerance());
}
// verify probability(double, double)
for (int i = 0; i < cumulativeTestPoints.length; i++) {
for (int j = 0; j < cumulativeTestPoints.length; j++) {
if (cumulativeTestPoints[i] <= cumulativeTestPoints[j]) {
TestUtils.assertEquals(cumulativeTestValues[j] - cumulativeTestValues[i],
distribution.probability(cumulativeTestPoints[i], cumulativeTestPoints[j]),
getTolerance());
} else {
try {
distribution.probability(cumulativeTestPoints[i], cumulativeTestPoints[j]);
} catch (final IllegalArgumentException e) {
continue;
}
Assertions.fail("distribution.probability(double, double) should have thrown an exception that second argument is too large");
}
}
}
}
/**
* Verifies that inverse cumulative probability density calculations match expected values
* using current test instance data
*/
protected void verifyInverseCumulativeProbabilities() {
for (int i = 0; i < inverseCumulativeTestPoints.length; i++) {
TestUtils.assertEquals("Incorrect inverse cumulative probability value returned for " +
inverseCumulativeTestPoints[i], inverseCumulativeTestValues[i],
distribution.inverseCumulativeProbability(inverseCumulativeTestPoints[i]),
getTolerance());
}
}
/**
* Verifies that density calculations match expected values
*/
protected void verifyDensities() {
for (int i = 0; i < cumulativeTestPoints.length; i++) {
TestUtils.assertEquals("Incorrect probability density value returned for " +
cumulativeTestPoints[i], densityTestValues[i],
distribution.density(cumulativeTestPoints[i]),
getTolerance());
}
}
/**
* Verifies that logarithmic density calculations match expected values
*/
protected void verifyLogDensities() {
for (int i = 0; i < cumulativeTestPoints.length; i++) {
TestUtils.assertEquals("Incorrect probability density value returned for " +
cumulativeTestPoints[i], logDensityTestValues[i],
distribution.logDensity(cumulativeTestPoints[i]),
getTolerance());
}
}
//------------------------ Default test cases -----------------------------
/**
* Verifies that cumulative probability density calculations match expected values
* using default test instance data
*/
@Test
void testCumulativeProbabilities() {
verifyCumulativeProbabilities();
}
/**
* Verifies that inverse cumulative probability density calculations match expected values
* using default test instance data
*/
@Test
void testInverseCumulativeProbabilities() {
verifyInverseCumulativeProbabilities();
}
/**
* Verifies that density calculations return expected values
* for default test instance data
*/
@Test
void testDensities() {
verifyDensities();
}
/**
* Verifies that logarithmic density calculations return expected values
* for default test instance data
*/
@Test
void testLogDensities() {
verifyLogDensities();
}
/**
* Verifies that probability computations are consistent
*/
@Test
void testConsistency() {
for (int i = 1; i < cumulativeTestPoints.length; i++) {
// check that cdf(x, x) = 0
TestUtils.assertEquals(0d,
distribution.probability(cumulativeTestPoints[i], cumulativeTestPoints[i]),
tolerance);
// check that P(a < X <= b) = P(X <= b) - P(X <= a)
final double upper = Math.max(cumulativeTestPoints[i], cumulativeTestPoints[i - 1]);
final double lower = Math.min(cumulativeTestPoints[i], cumulativeTestPoints[i - 1]);
final double diff = distribution.cumulativeProbability(upper) -
distribution.cumulativeProbability(lower);
final double direct = distribution.probability(lower, upper);
TestUtils.assertEquals("Inconsistent probability for (" +
lower + "," + upper + ")", diff, direct, tolerance);
}
}
/**
* Verifies that illegal arguments are correctly handled
*/
@Test
void testPrecondition1() {
Assertions.assertThrows(DistributionException.class, () -> distribution.probability(1, 0));
}
@Test
void testPrecondition2() {
Assertions.assertThrows(DistributionException.class, () -> distribution.inverseCumulativeProbability(-1));
}
@Test
void testPrecondition3() {
Assertions.assertThrows(DistributionException.class, () -> distribution.inverseCumulativeProbability(2));
}
@Test
void testOutsideSupport() {
// Test various quantities when the variable is outside the support.
final double lo = distribution.getSupportLowerBound();
final double hi = distribution.getSupportUpperBound();
final double below = lo - Math.ulp(lo);
final double above = hi + Math.ulp(hi);
Assertions.assertEquals(0d, distribution.density(below));
Assertions.assertEquals(0d, distribution.density(above));
Assertions.assertEquals(Double.NEGATIVE_INFINITY, distribution.logDensity(below));
Assertions.assertEquals(Double.NEGATIVE_INFINITY, distribution.logDensity(above));
Assertions.assertEquals(0d, distribution.cumulativeProbability(below));
Assertions.assertEquals(1d, distribution.cumulativeProbability(above));
}
/**
* Test sampling
*/
@Test
void testSampler() {
final int sampleSize = 1000;
final ContinuousDistribution.Sampler sampler =
distribution.createSampler(RandomSource.create(RandomSource.WELL_19937_C, 123456789L));
final double[] sample = AbstractContinuousDistribution.sample(sampleSize, sampler);
final double[] quartiles = TestUtils.getDistributionQuartiles(distribution);
final double[] expected = {250, 250, 250, 250};
final long[] counts = new long[4];
for (int i = 0; i < sampleSize; i++) {
TestUtils.updateCounts(sample[i], counts, quartiles);
}
TestUtils.assertChiSquareAccept(expected, counts, 0.001);
}
/**
* Verify that density integrals match the distribution.
* The (filtered, sorted) cumulativeTestPoints array is used to source
* integration limits. The integral of the density (estimated using a
* Legendre-Gauss integrator) is compared with the cdf over the same
* interval. Test points outside of the domain of the density function
* are discarded.
*/
@Test
void testDensityIntegrals() {
final double tol = 1e-9;
final BaseAbstractUnivariateIntegrator integrator =
new IterativeLegendreGaussIntegrator(5, 1e-12, 1e-10);
final UnivariateFunction d = new UnivariateFunction() {
@Override
public double value(double x) {
return distribution.density(x);
}
};
final ArrayList<Double> integrationTestPoints = new ArrayList<>();
for (int i = 0; i < cumulativeTestPoints.length; i++) {
if (Double.isNaN(cumulativeTestValues[i]) ||
cumulativeTestValues[i] < 1e-5 ||
cumulativeTestValues[i] > 1 - 1e-5) {
continue; // exclude integrals outside domain.
}
integrationTestPoints.add(cumulativeTestPoints[i]);
}
Collections.sort(integrationTestPoints);
for (int i = 1; i < integrationTestPoints.size(); i++) {
Assertions.assertEquals(distribution.probability(integrationTestPoints.get(0), integrationTestPoints.get(i)),
integrator.integrate(1000000, // Triangle integrals are very slow to converge
d, integrationTestPoints.get(0),
integrationTestPoints.get(i)), tol);
}
}
/**
* Test if the distribution is support connected. This test exists to ensure the support
* connected property is tested. This may be evaluated in the default implementation
* of {@link AbstractContinuousDistribution#inverseCumulativeProbability(double)}
* depending on the data points used to test the distribution (see
* {@link #makeInverseCumulativeTestPoints()}). If this default method has been overridden
* then the support connected property is not used elsewhere in the standard tests.
*/
@Test
void testIsSupportConnected() {
Assertions.assertEquals(isSupportConnected(), distribution.isSupportConnected());
}
//------------------ Getters / Setters for test instance data -----------
/**
* @return Returns the cumulativeTestPoints.
*/
protected double[] getCumulativeTestPoints() {
return cumulativeTestPoints;
}
/**
* @param cumulativeTestPoints The cumulativeTestPoints to set.
*/
protected void setCumulativeTestPoints(double[] cumulativeTestPoints) {
this.cumulativeTestPoints = cumulativeTestPoints;
}
/**
* @return Returns the cumulativeTestValues.
*/
protected double[] getCumulativeTestValues() {
return cumulativeTestValues;
}
/**
* @param cumulativeTestValues The cumulativeTestValues to set.
*/
protected void setCumulativeTestValues(double[] cumulativeTestValues) {
this.cumulativeTestValues = cumulativeTestValues;
}
/**
* @return Returns the densityTestValues.
*/
protected double[] getDensityTestValues() {
return densityTestValues;
}
/**
* @param densityTestValues The densityTestValues to set.
*/
protected void setDensityTestValues(double[] densityTestValues) {
this.densityTestValues = densityTestValues;
}
/**
* @return Returns the distribution.
*/
protected ContinuousDistribution getDistribution() {
return distribution;
}
/**
* @param distribution The distribution to set.
*/
protected void setDistribution(ContinuousDistribution distribution) {
this.distribution = distribution;
}
/**
* @return Returns the inverseCumulativeTestPoints.
*/
protected double[] getInverseCumulativeTestPoints() {
return inverseCumulativeTestPoints;
}
/**
* @param inverseCumulativeTestPoints The inverseCumulativeTestPoints to set.
*/
protected void setInverseCumulativeTestPoints(double[] inverseCumulativeTestPoints) {
this.inverseCumulativeTestPoints = inverseCumulativeTestPoints;
}
/**
* @return Returns the inverseCumulativeTestValues.
*/
protected double[] getInverseCumulativeTestValues() {
return inverseCumulativeTestValues;
}
/**
* @param inverseCumulativeTestValues The inverseCumulativeTestValues to set.
*/
protected void setInverseCumulativeTestValues(double[] inverseCumulativeTestValues) {
this.inverseCumulativeTestValues = inverseCumulativeTestValues;
}
/**
* @return Returns the tolerance.
*/
protected double getTolerance() {
return tolerance;
}
/**
* @param tolerance The tolerance to set.
*/
protected void setTolerance(double tolerance) {
this.tolerance = tolerance;
}
/**
* The expected value for {@link ContinuousDistribution#isSupportConnected()}.
* The default is {@code true}. Test class should override this when the distribution
* is not support connected.
*
* @return Returns true if the distribution is support connected
*/
protected boolean isSupportConnected() {
return true;
}
}