/
randomTest.js
429 lines (387 loc) · 13.5 KB
/
randomTest.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
'use strict'
const assert = require('assert')
const crypto = require('../index')
const nacl = require('tweetnacl')
const test = require('tape')
/*
* We apply a psi test to various distributions related to our uniform
* sampler.
*
* The psi test, also known as the G-test, is similar to the chi^2
* test, and is preferred by everyone's favourite Bayesian polemicist
* Jaynes, who discusses it in Secs. 9.11.1--9.12 of his book on
* pp. 296--301.
*
* Scaled appropriately, the psi test statistic converges to a chi^2
* distribution as the number of samples grows without bound, so we
* can use standard tables of chi^2 critical values. I picked 100
* degrees of freedom because that's the highest number in the NIST's
* table:
*
* NIST/SEMATECH e-Handbook of Statistical Methods,
* Sec. 1.3.6.7.4: Critical Values of the Chi-Square Distribution,
* <https://www.itl.nist.gov/div898/handbook/eda/section3/eda3674.htm>,
* April 2012, retrieved 2018-07-23.
*
* We choose a significance level alpha = 0.01, meaning there is a 1%
* probability of spuriously reporting failure for any individual
* trial of a psi test. Since there are many tests in this suite, the
* probability of a spurious test _suite_ failure in n cases is
*
* 1 - Binom(0; n, alpha) = 1 - (1 - alpha)^n,
*
* which grows rapidly as n grows. To keep it smaller, at some cost
* in statistical power to detect errors, we further try each psi test
* for t trials and allow the test to pass as long as at least k
* trials pass, so that it is as if each test's spurious failure
* probability were
*
* 1 - \sum_{i=k}^t Binom(i; t, alpha).
*
* In the case of t = 2 trials of which k = 1 must pass, this is
* alpha^2, and the spurious probability failure n tests is
*
* 1 - (1 - alpha^2)^n.
*
* With fifty tests, this is 1 - (1 - 0.0001)^50 < .499%.
*
* NOTE: Some of these tests have the reverse sense: rather than being
* hypothesis tests trying to reject the null hypothesis of working
* code, they are hypothesis tests trying to reject the null
* hypothesis of _specific bugs_ which serve as alternative hypotheses
* for the ordinary tests, and as such their significance level is the
* complement of the _statistical power_ of the ordinary tests. We do
* this to test the tests for whether they actually test anything.
*
* I don't know what that statistical power is -- it may be easy to
* compute in some cases, but often it isn't known analytically even
* by Serious Statisticians with strings of letters after their names.
* Empirically, with dozens of runs, it seems to be high enough, and
* likely exceeds 99%.
*/
const NSAMPLES = 100000
const DF = 100
const CHI2_CRITICAL = 135.807
const NPASSES_MIN = 1
const NTRIALS = 2
function trials (t, name, ntrials, npassesMin, f) {
let npass = 0
let trial
for (trial = 0; npass < npassesMin && trial < ntrials; trial++) {
if (f()) {
npass++
}
}
t.ok(npass >= npassesMin, `${npass} of ${trial} ${name} trials`)
}
function psi (C, P, n) {
assert.strictEqual(DF, C.length)
assert.strictEqual(DF, P.length)
let psi = 0
for (let i = 0; i < DF; i++) {
if (C[i] === 0) {
continue
}
assert(C[i] > 0)
psi += C[i] * Math.log(C[i] / (n * P[i]))
}
psi *= 2
return psi
}
function psiTest (t, probability, sample) {
t.plan(1)
const P = new Float64Array(DF) // probability
for (let i = 0; i < DF; i++) {
P[i] = probability(i)
}
trials(t, 'psi', NTRIALS, NPASSES_MIN, () => {
const C = new Uint32Array(DF) // count
for (let s = 0; s < NSAMPLES; s++) {
C[sample()]++
}
return psi(C, P, NSAMPLES) <= CHI2_CRITICAL
})
}
function psiTestReject (t, probability, sample) {
t.plan(1)
const P = new Float64Array(DF) // probability
for (let i = 0; i < DF; i++) {
P[i] = probability(i)
}
trials(t, 'psi reject', NTRIALS, NTRIALS - NPASSES_MIN + 1, () => {
const C = new Uint32Array(DF) // count
for (let s = 0; s < NSAMPLES; s++) {
C[sample()]++
}
return psi(C, P, NSAMPLES) > CHI2_CRITICAL
})
}
// Like uniform, but with a bug: wrong shift.
function baduniform (n) {
if (typeof n !== 'number' || n % 1 !== 0 || n <= 0 || n > (2 ** 53)) {
throw new Error('Bound must be positive integer at most 2^53.')
}
const min = (2 ** 53) % n
let x
do {
const b = nacl.randomBytes(7)
const l32 = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 26)
const h21 = b[4] | (b[5] << 8) | ((b[6] & 0x1f) << 16)
x = (2 ** 32) * h21 + l32
} while (x < min)
return x % n
}
const ERRPAT = /Bound must be positive integer at most 2\^53\./
test('uniform() throws', (t) => {
t.plan(1)
t.throws(() => crypto.random.uniform(), ERRPAT)
})
test("uniform('foo') throws", (t) => {
t.plan(1)
t.throws(() => crypto.random.uniform('foo'), ERRPAT)
})
test('uniform(0) throws', (t) => {
t.plan(1)
t.throws(() => crypto.random.uniform(0), ERRPAT)
})
test('uniform(0.5) throws', (t) => {
t.plan(1)
t.throws(() => crypto.random.uniform(0.5), ERRPAT)
})
// round(2**53 + 1) = 2**53, but round(2**53 + 2) > 2**53
test('uniform(2**53 + 2) throws', (t) => {
t.plan(1)
t.throws(() => crypto.random.uniform((2 ** 53) + 2), ERRPAT)
})
test('uniform(1) yields 0', (t) => {
t.plan(1)
t.equal(0, crypto.random.uniform(1))
})
test('uniform(2**53) does not throw', (t) => {
t.plan(1)
t.doesNotThrow(() => crypto.random.uniform(2 ** 53))
})
test('uniform(DF) passes psi test for uniform distribution', (t) => {
psiTest(t, i => 1 / DF, () => crypto.random.uniform(DF))
})
// Empirically confirm that the psi test has enough statistical power
// to detect modulo bias in the above test.
test('uniform(2*DF + 1) % DF fails psi test for uniform distribution', (t) => {
psiTestReject(t, i => 1 / DF, () => crypto.random.uniform((2 * DF) + 1) % DF)
})
test('uniform(256) % DF passes psi test for modulo bias', (t) => {
psiTest(t, i => (Math.floor(256 / DF) + (i < 256 % DF)) / 256, () => {
return crypto.random.uniform(256) % DF
})
})
// Why do we test the bits? Because I made this error:
//
// const l32 = b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 26)
//
// And it was _not_ caught by the above uniform tests.
test('bits [24..32) of bad uniform fail psi test for uniform distribution', (t) => {
psiTestReject(t, i => (Math.floor(256 / DF) + (i < 256 % DF)) / 256, () => {
return (0xff & Math.floor(baduniform(2 ** 53) / (2 ** 24))) % DF
})
})
for (let b = 0; b < 53 - 8; b += 8) {
test(`bits [${b}..${b + 8}) of uniform(2**53) pass psi test for uniform distribution`, (t) => {
psiTest(t, i => (Math.floor(256 / DF) + (i < 256 % DF)) / 256, () => {
const x = crypto.random.uniform(2 ** 53)
return (0xff & Math.floor(x / (2 ** b))) % DF
})
})
}
test(`bits [45..53) of uniform(2**53) pass psi test for uniform distribution`, (t) => {
psiTest(t, i => (Math.floor(256 / DF) + (i < 256 % DF)) / 256, () => {
return (0xff & Math.floor(crypto.random.uniform(2 ** 53) / (2 ** 45))) % DF
})
})
// Like uniform_01, but limited to binary16 numbers with 11 bits of
// precision.
function uniform_01_lowprec () { // eslint-disable-line camelcase
function uniform16 () {
const b = nacl.randomBytes(2)
return (b[0] | (b[1] << 8)) >>> 0
}
// Draw an exponent with geometric distribution. Here emin = -14,
// so 16 bits is plenty.
const e = Math.clz32(uniform16()) - 16
// Draw normal odd 16-bit significand with uniform distribution.
const s0 = (uniform16() | 0x8001) >>> 0
// Round to an 11-bit significand in [2^15, 2^16], yielding a
// significand that is a multiple of 2^(16 - 11) = 2^5.
const hack = 3 * (2 ** (16 - 11 + 53 - 2))
const s = (s0 + hack) - hack
// Scale into [1/2, 1] and apply the exponent.
return s * (2 ** (-16 - e))
}
// Like uniform_01_lowprec, but with a bug: numbers <2^-11 excluded,
// as if you used the naive approach for sampling IEEE 754-2008
// binary16 numbers in [0,1] that many people use for binary64
// numbers.
function baduniform_01_lowprec () { // eslint-disable-line camelcase
return crypto.random.uniform(2 ** 11) / (2 ** 11)
}
// Like uniform_01, but with a bug: wrong shift amount.
function baduniform_01_badshift () { // eslint-disable-line camelcase
function uniform32 () {
const b = nacl.randomBytes(4)
return (b[0] | (b[1] << 8) | (b[2] << 16) | (b[3] << 25)) >>> 0
}
// Draw an exponent with geometric distribution.
let e = 0
let x
while (e < 1088) {
if ((x = uniform32()) !== 0) {
break
}
e += 32
}
e += Math.clz32(x)
// Draw normal odd 64-bit significand with uniform distribution.
const hi = (uniform32() | 0x80000000) >>> 0
const lo = (uniform32() | 0x00000001) >>> 0
// Assemble parts into [2^63, 2^64) with uniform distribution.
// Using an odd low part breaks ties in the rounding, which should
// occur only in a set of measure zero.
const s = hi * (2 ** 32) + lo
// Scale into [1/2, 1) and apply the exponent.
return s * (2 ** (-64 - e))
}
function reject (x0, f) {
let x
do {
x = f()
} while (x === x0)
return x
}
// It had better appear uniformly distributed to psi. The
// distribution is not exact, but the error of each bucket's
// probability from 1/DF is so small it is insignificant here.
test('uniform_01() passes psi test for uniformly spaced buckets', (t) => {
psiTest(t, i => 1 / DF, () => {
return Math.floor(reject(1, crypto.random.uniform_01) * DF)
})
})
// dist16[i] = Pr[i/100 <= min(fp16(U), 99) < (i + 1)/100], where
// fp16(U) is the standard rounding to a binary16 floating-point
// number of a uniform random real in [0,1].
const dist16 = [
9.993438720703124e-3, 9.993438720703124e-3,
0.0100048828125, 0.00998199462890625,
0.0100048828125, 0.0100048828125,
0.0099591064453125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.009913330078125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.00994384765625, 0.0098828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0100048828125,
0.0100048828125, 0.0098828125,
0.0098828125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.010126953125, 0.0098828125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.010126953125,
0.0098828125, 0.01037109375
]
// The low-precision variant had better appear uniformly distributed
// to psi, at least as uniform as binary16 floating-point arithmetic
// can be, which is nonuniform enough we need to compute it more
// precisely in the dist16 table.
test('uniform_01_lowprec() passes psi test for uniformly spaced buckets', (t) => {
psiTest(t, i => dist16[i], () => {
const x = uniform_01_lowprec()
for (let i = 0; i < DF; i++) {
if (x < (i + 1) / DF) {
return i
}
}
assert(x === 1)
return DF - 1
})
})
// Test that in 100000 samples we get at least one nonzero sample
// below 2^-11 is somewhere between 2^-11 and 2^-12, say 2^-12 to be
// conservative; the probability of a sample failing this criterion is
// then at most 1 - 2^-12; the probability of _all_ samples failing
// this criterion, i.e. a spurious failure of the test, is at most
//
// (1 - 2^-12)^100000 ~= 2 * 10^-11 < 10^-10.
//
// This is not zero, but it's close enough for a test suite like this,
// and far below the spurious failure probability of 0.0001 for other
// tests here!
test('uniform_01_lowprec() passes small number test', (t) => {
t.plan(1)
trials(t, 'small number', NTRIALS, NPASSES_MIN, () => {
for (let i = 0; i < NSAMPLES; i++) {
const x = uniform_01_lowprec()
if (x > 0 && x < 2 ** -11) {
return true
}
}
return false
})
})
// baduniform_01_lowprec() may return 0, but it will never return
// anything 0 < x < 2^-11.
test('baduniform_01_lowprec() fails small number test', (t) => {
t.plan(1)
trials(t, 'small number', NTRIALS, NTRIALS - NPASSES_MIN + 1, () => {
for (let i = 0; i < NSAMPLES; i++) {
const x = baduniform_01_lowprec()
if (x > 0 && x < 2 ** -11) {
return false
}
}
return true
})
})
// If we discard some bits of the full-precision uniform_01(), it
// should continue to pass psi.
test('(uniform_01()*64)%1 passes psi test for uniformly spaced buckets', (t) => {
psiTest(t, i => 1 / DF, () => {
return Math.floor(((reject(1, crypto.random.uniform_01) * 64) % 1) * DF)
})
})
// Another pathology.
test('baduniform_01_badshift() fails psi test for uniformly spaced buckets', (t) => {
psiTestReject(t, i => 1 / DF, () => {
return Math.floor(reject(1, baduniform_01_badshift) * DF)
})
})