/
stoc3.cpp
1267 lines (1094 loc) · 47.9 KB
/
stoc3.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*************************** stoc3.cpp **********************************
* Author: Agner Fog
* Date created: 2002-10-02
* Last modified: 2008-11-21
* Project: stocc.zip
* Source URL: www.agner.org/random
*
* Description:
* Non-uniform random number generator functions.
*
* This file contains source code for the class StochasticLib3 derived
* from StochasticLib1 or StochasticLib2, defined in stocc.h.
*
* This class implements methods for sampling from the noncentral and extended
* hypergeometric distributions, as well as the multivariate versions of these.
*
* Documentation:
* ==============
* The file stocc.h contains class definitions.
* The file stocc.htm contains further instructions.
* The file nchyp.pdf, available from www.agner.org/random/theory contains
* theoretical description of Wallenius' and Fisher's noncentral hypergeometric
* distributions and the methods used in this code to sample from these.
* The file ran-instructions.pdf contains general instructions.
*
* Copyright 2002-2008 by Agner Fog.
* GNU General Public License http://www.gnu.org/licenses/gpl.html
*****************************************************************************/
#include <string.h> // memcpy function
#include "stocc.h" // class definitions
//#include "wnchyppr.cpp" // calculate Wallenius noncentral hypergeometric probability
//#include "fnchyppr.cpp" // calculate Fisher's noncentral hypergeometric probability
/******************************************************************************
Methods for class StochasticLib3
******************************************************************************/
/***********************************************************************
Constructor
***********************************************************************/
StochasticLib3::StochasticLib3(int seed) : StochasticLib1(seed) {
SetAccuracy(1.E-8); // set default accuracy
// Initialize variables
fnc_n_last = -1, fnc_m_last = -1, fnc_N_last = -1;
fnc_o_last = -1.;
wnc_n_last = -1, wnc_m_last = -1, wnc_N_last = -1;
wnc_o_last = -1.;
}
/***********************************************************************
SetAccuracy
***********************************************************************/
void StochasticLib3::SetAccuracy(double accur) {
// define accuracy of calculations for
// WalleniusNCHyp and MultiWalleniusNCHyp
if (accur < 0.) accur = 0.;
if (accur > 0.01) accur = 0.01;
accuracy = accur;
}
/***********************************************************************
Wallenius Non-central Hypergeometric distribution
***********************************************************************/
int32_t StochasticLib3::WalleniusNCHyp (int32_t n, int32_t m, int32_t N, double odds) {
/*
This function generates a random variate with Wallenius noncentral
hypergeometric distribution.
Wallenius noncentral hypergeometric distribution is the distribution you
get when drawing balls without replacement from an urn containing red and
white balls, with bias.
We define the weight of the balls so that the probability of taking a
particular ball is proportional to its weight. The value of odds is the
normalized odds ratio: odds = weight(red) / weight(white).
If all balls have the same weight, i.e. odds = 1, then we get the
hypergeometric distribution.
n is the number of balls you take,
m is the number of red balls in the urn,
N is the total number of balls in the urn,
odds is the odds ratio,
and the return value is the number of red balls you get.
Four different calculation methods are implemented. This function decides
which method to use, based on the parameters.
*/
// check parameters
if (n >= N || m >= N || n <= 0 || m <= 0 || odds <= 0.) {
// trivial cases
if (n == 0 || m == 0) return 0;
if (m == N) return n;
if (n == N) return m;
if (odds == 0.) {
if (n > N-m) FatalError("Not enough items with nonzero weight in function WalleniusNCHyp");
return 0;}
// illegal parameter
FatalError("Parameter out of range in function WalleniusNCHyp");}
if (odds == 1.) {
// use hypergeometric function if odds == 1
return Hypergeometric(n, m, N);}
if (n < 30) {
return WalleniusNCHypUrn(n, m, N, odds);}
if (double(n)*N < 10000) {
return WalleniusNCHypTable(n, m, N, odds);}
return WalleniusNCHypRatioOfUnifoms(n, m, N, odds);
// the decision to use NoncentralHypergeometricInversion is
// taken inside WalleniusNCHypRatioOfUnifoms based
// on the calculated variance.
}
/***********************************************************************
Subfunctions for WalleniusNCHyp
***********************************************************************/
int32_t StochasticLib3::WalleniusNCHypUrn (int32_t n, int32_t m, int32_t N, double odds) {
// sampling from Wallenius noncentral hypergeometric distribution
// by simulating urn model
int32_t x; // sample
int32_t m2; // items of color 2 in urn
double mw1, mw2; // total weight of balls of color 1 or 2
x = 0; m2 = N - m;
mw1 = m * odds; mw2 = m2;
do {
if (Random() * (mw1 + mw2) < mw1) {
x++; m--;
if (m == 0) break;
mw1 = m * odds;
}
else {
m2--;
if (m2 == 0) {
x += n-1; break;
}
mw2 = m2;
}
}
while (--n);
return x;
}
int32_t StochasticLib3::WalleniusNCHypTable (int32_t n, int32_t m, int32_t N, double odds) {
// Sampling from Wallenius noncentral hypergeometric distribution
// using chop-down search from a table created by recursive calculation.
// This method is fast when n is low or when called repeatedly with
// the same parameters.
int32_t x2; // upper x limit for table
int32_t x; // sample
double u; // uniform random number
int success; // table long enough
if (n != wnc_n_last || m != wnc_m_last || N != wnc_N_last || odds != wnc_o_last) {
// set-up: This is done only when parameters have changed
wnc_n_last = n; wnc_m_last = m; wnc_N_last = N; wnc_o_last = odds;
CWalleniusNCHypergeometric wnch(n,m,N,odds); // make object for calculation
success = wnch.MakeTable(wall_ytable, WALL_TABLELENGTH, &wall_x1, &x2); // make table of probability values
if (success) {
wall_tablen = x2 - wall_x1 + 1; // table long enough. remember length
}
else {
wall_tablen = 0; // remember failure
}
}
if (wall_tablen == 0) {
// table not long enough. Use another method
return WalleniusNCHypRatioOfUnifoms(n,m,N,odds);
}
while (1) { // repeat in the rare case of failure
u = Random(); // uniform variate to convert
for (x=0; x<wall_tablen; x++) { // chop-down search
u -= wall_ytable[x];
if (u < 0.) return x + wall_x1; // value found
}
}
}
int32_t StochasticLib3::WalleniusNCHypRatioOfUnifoms (int32_t n, int32_t m, int32_t N, double odds) {
// sampling from Wallenius noncentral hypergeometric distribution
// using ratio-of-uniforms rejection method.
int32_t xmin, xmax; // x limits
double mean; // mean
double variance; // variance
double x; // real sample
int32_t xi; // integer sample
int32_t x2; // limit when searching for mode
double u; // uniform random
double f, f2; // probability function value
double s123; // components 1,2,3 of hat width
double s4; // component 4 of hat width
double r1, r2; // temporaries
static const double rsqrt2pi = 0.3989422804014326857; // 1/sqrt(2*pi)
// Make object for calculating mean and probability.
CWalleniusNCHypergeometric wnch(n, m, N, odds, accuracy);
xmin = m+n-N; if (xmin < 0) xmin = 0; // calculate limits
xmax = n; if (xmax > m) xmax = m;
if (n != wnc_n_last || m != wnc_m_last || N != wnc_N_last || odds != wnc_o_last) {
// set-up: This is done only when parameters have changed
wnc_n_last = n; wnc_m_last = m; wnc_N_last = N; wnc_o_last = odds;
// find approximate mean
mean = wnch.mean();
// find approximate variance from Fisher's noncentral hypergeometric approximation
r1 = mean * (m-mean); r2 = (n-mean)*(mean+N-n-m);
variance = N*r1*r2/((N-1)*(m*r2+(N-m)*r1));
UseChopDown = variance < 4.; // use chop-down method if variance is low
if (!UseChopDown) {
// find mode (same code in CWalleniusNCHypergeometric::mode)
wnc_mode = (int32_t)(mean); f2 = 0.;
if (odds < 1.) {
if (wnc_mode < xmax) wnc_mode++;
x2 = xmin;
if (odds > 0.294 && N <= 10000000) {
x2 = wnc_mode - 1;} // search for mode can be limited
for (xi = wnc_mode; xi >= x2; xi--) {
f = wnch.probability(xi);
if (f <= f2) break;
wnc_mode = xi; f2 = f;
}
}
else {
if (wnc_mode < xmin) wnc_mode++;
x2 = xmax;
if (odds < 3.4 && N <= 10000000) {
x2 = wnc_mode + 1;} // search for mode can be limited
for (xi = wnc_mode; xi <= x2; xi++) {
f = wnch.probability(xi);
if (f <= f2) break;
wnc_mode = xi; f2 = f;
}
}
wnc_k = f2; // value at mode
// find approximate variance from normal distribution approximation
variance = rsqrt2pi / wnc_k; variance *= variance;
// find center and width of hat function
wnc_a = mean + 0.5;
s123 = 0.40 + 0.8579*sqrt(variance+0.5) + 0.4*fabs(mean-wnc_mode);
s4 = 0.;
r1 = xmax - mean - s123; r2 = mean - s123 - xmin;
if (r1 > r2) r1 = r2;
if ((odds>5. || odds<0.2) && r1>=-0.5 && r1<=8.) {
// s4 correction needed
if (r1 < 1.) r1 = 1.;
s4 = 0.029 * pow(double(N),0.23) / (r1*r1);
}
wnc_h = 2. * (s123 + s4);
// find safety bounds
wnc_bound1 = (int32_t)(mean - 4. * wnc_h);
if (wnc_bound1 < xmin) wnc_bound1 = xmin;
wnc_bound2 = (int32_t)(mean + 4. * wnc_h);
if (wnc_bound2 > xmax) wnc_bound2 = xmax;
}
}
if (UseChopDown) { // for small variance, use chop down inversion
return WalleniusNCHypInversion(n,m,N,odds);
}
// use ratio-of-uniforms rejection method
while(1) { // rejection loop
u = Random();
if (u == 0.) continue; // avoid division by 0
x = wnc_a + wnc_h * (Random()-0.5)/u;
if (x < 0. || x > 2E9) continue; // reject, avoid overflow
xi = (int32_t)(x); // truncate
if (xi < wnc_bound1 || xi > wnc_bound2) {
continue; // reject if outside safety bounds
}
#if 0 // use rejection in x-domain
if (xi == wnc_mode) break; // accept
f = wnch.probability(xi); // function value
if (f > wnc_k * u * u) {
break; // acceptance
}
#else // use rejection in t-domain (this is faster)
double hx, s2, xma2; // compute h(x)
s2 = wnc_h * 0.5; s2 *= s2;
xma2 = xi - (wnc_a-0.5);
xma2 *= xma2;
hx = (s2 >= xma2) ? 1. : s2 / xma2;
// rejection in t-domain implemented in CWalleniusNCHypergeometric::BernouilliH
if (wnch.BernouilliH(xi, hx * wnc_k * 1.01, u * u * wnc_k * 1.01, this)) {
break; // acceptance
}
#endif
} // rejection
return xi;
}
int32_t StochasticLib3::WalleniusNCHypInversion (int32_t n, int32_t m, int32_t N, double odds) {
// sampling from Wallenius noncentral hypergeometric distribution
// using down-up search starting at the mean using the chop-down technique.
// This method is faster than the rejection method when the variance is low.
int32_t wall_x1, x2; // search values
int32_t xmin, xmax; // x limits
double u; // uniform random number to be converted
double f; // probability function value
double accura; // absolute accuracy
int updown; // 1 = search down, 2 = search up, 3 = both
// Make objects for calculating mean and probability.
// It is more efficient to have two identical objects, one for down search
// and one for up search, because they are obtimized for consecutive x values.
CWalleniusNCHypergeometric wnch1(n, m, N, odds, accuracy);
CWalleniusNCHypergeometric wnch2(n, m, N, odds, accuracy);
accura = accuracy * 0.01;
if (accura > 1E-7) accura = 1E-7; // absolute accuracy
wall_x1 = (int32_t)(wnch1.mean()); // start at floor and ceiling of mean
x2 = wall_x1 + 1;
xmin = m+n-N; if (xmin<0) xmin = 0; // calculate limits
xmax = n; if (xmax>m) xmax = m;
updown = 3; // start searching both up and down
while(1) { // loop until accepted (normally executes only once)
u = Random(); // uniform random number to be converted
while (updown) { // search loop
if (updown & 1) { // search down
if (wall_x1 < xmin) {
updown &= ~1;} // stop searching down
else {
f = wnch1.probability(wall_x1);
u -= f; // subtract probability until 0
if (u <= 0.) return wall_x1;
wall_x1--;
if (f < accura) updown &= ~1; // stop searching down
}
}
if (updown & 2) { // search up
if (x2 > xmax) {
updown &= ~2; // stop searching up
}
else {
f = wnch2.probability(x2);
u -= f; // subtract probability until 0
if (u <= 0.) return x2;
x2++;
if (f < accura) updown &= ~2; // stop searching down
}
}
}
}
}
/***********************************************************************
Multivariate Wallenius noncentral hypergeometric distribution
***********************************************************************/
void StochasticLib3::MultiWalleniusNCHyp (int32_t * destination,
int32_t * source, double * weights, int32_t n, int colors) {
/*
This function generates a vector of random variables with the
multivariate Wallenius noncentral hypergeometric distribution.
The multivariate Wallenius noncentral hypergeometric distribution is
the distribution you get when drawing colored balls from an urn
with any number of colors, without replacement, and with bias.
The weights are defined so that the probability of taking a particular
ball is proportional to its weight.
Parameters:
destination: An output array to receive the number of balls of each
color. Must have space for at least 'colors' elements.
source: An input array containing the number of balls of each
color in the urn. Must have 'colors' elements.
All elements must be non-negative.
weights: The odds of each color. Must have 'colors' elements.
All elements must be non-negative.
n: The number of balls to draw from the urn.
Cannot exceed the total number of balls with nonzero weight
in source.
colors: The number of possible colors.
MAXCOLORS (defined in stocc.h): You may adjust MAXCOLORS to the maximum
number of colors you need.
The function will reduce the number of colors, if possible, by eliminating
colors with zero weight or zero number and pooling together colors with the
same weight. The problem thus reduced is handled in the arrays osource,
urn, oweights and osample of size colors2.
The sampling proceeds by either of two methods: simulating urn experiment,
or conditional method followed by Metropolis-Hastings sampling.
Simulating the urn experiment is simply taking one ball at a time, requiring
n uniform random variates. The problem is reduced whenever a color has been
exhausted.
The conditional method divides the colors into groups where the number of
balls in each group is determined by sampling from the marginal distribution
which is approximated by the univariate Wallenius distribution. Each group
is then subdivided by sampling one color at a time until all colors have
been sampled.
The sample from the conditional method does not have the exact distribution,
but it is used as a starting point for the Metropolis-Hastings sampling,
which proceeds as follows: colors c1 and c2 are re-sampled using the
univariate Wallenius distribution, keeping the samples of all other colors
constant. The new sample is accepted or the old sample retained, according
to the Metropolis formula which corrects for the slight error introduced
by not using the true conditional distribution. c1 and c2 are rotated in
an order determined by the variance of each color. This rotation (scan) is
repeated nHastings times.
*/
// variables
int order1[MAXCOLORS]; // sort order, index into source and destination
int order2[MAXCOLORS]; // corresponding index into arrays when equal weights pooled together
int order3[MAXCOLORS]; // secondary index for sorting by variance
int32_t osource[MAXCOLORS]; // contents of source, sorted by weight with equal weights pooled together
int32_t urn[MAXCOLORS]; // balls from osource not taken yet
int32_t osample[MAXCOLORS]; // balls sampled
double oweights[MAXCOLORS]; // sorted list of weights
double wcum[MAXCOLORS]; // list of accumulated probabilities
double var[MAXCOLORS]; // sorted list of variance
double w = 0.; // weight of balls of one color
double w1, w2; // odds within group; mean weight in group
double wsum; // total weight of all balls of several or all colors
double p; // probability
double f0, f1; // multivariate probability function
double g0, g1; // conditional probability function
double r1, r2; // temporaries in calculation of variance
int32_t nn; // number of balls left to sample
int32_t m; // number of balls of one color
int32_t msum; // total number of balls of several or all colors
int32_t N; // total number of balls with nonzero weight
int32_t x0, x = 0; // sample of one color
int32_t n1, n2, ng; // size of weight group sample or partial sample
int32_t m1, m2; // size of weight group
int i, j, k; // loop counters
int c, c1, c2; // color index
int colors2; // reduced number of colors
int a, b; // color index delimiting weight group
int nHastings; // number of scans in Metropolis-Hastings sampling
// check validity of parameters
if (n < 0 || colors < 0 || colors > MAXCOLORS) FatalError("Parameter out of range in function MultiWalleniusNCHyp");
if (colors == 0) return;
if (n == 0) {
for (i=0; i<colors; i++) destination[i] = 0; return;
}
// check validity of array parameters
for (i=0, msum=0; i < colors; i++) {
m = source[i]; w = weights[i];
if (m < 0 || w < 0) FatalError("Parameter negative in function MultiWalleniusNCHyp");
if (w) msum += m;
}
N = msum;
// sort colors by weight, heaviest first
for (i=0; i < colors; i++) order1[i] = order3[i] = i;
for (i=0; i < colors-1; i++) {
c = order1[i]; k = i;
w = weights[c];
if (source[c]==0) w = 0; // zero number treated as zero weight
for (j=i+1; j < colors; j++) {
c2 = order1[j];
if (weights[c2] > w && source[c2]) {
w = weights[c2]; k = j;
}
}
order1[i] = order1[k]; order1[k] = c;
}
// skip any colors with zero weight or zero number.
// this solves all problems with zero weights
while (colors && (weights[c=order1[colors-1]]==0 || source[c]==0)) {
colors--; destination[c] = 0;
}
// check if there are more than n balls with nonzero weight
if (n >= N) {
if (n > N) FatalError("Taking more items than there are in function MultiWalleniusNCHyp");
for (i = 0; i < colors; i++) {c = order1[i]; destination[c] = source[c];}
return;
}
// copy source and weights into ordered lists
// and pool together colors with same weight
for (i=0, c2=-1; i < colors; i++) {
c = order1[i];
if (i==0 || weights[c] != w) {
c2++;
x = source[c];
oweights[c2] = w = weights[c];
}
else {
x += source[c]; // join colors with same weight
}
urn[c2] = osource[c2] = x;
order2[i] = c2;
osample[c2] = 0;
}
colors2 = c2 + 1;
// check number of colors left
if (colors2 < 3) {
// simple cases
if (colors2 == 1) osample[0] = n;
if (colors2 == 2) {
x = WalleniusNCHyp(n, osource[0], N, oweights[0]/oweights[1]);
osample[0] = x; osample[1] = n - x;
}
}
else {
// more than 2 colors
nn = n;
// decide which method to use
if (nn < 5000 * colors2) {
// Simulate urn experiment
// Make list of accumulated probabilities of each color
for (i=0, wsum=0; i < colors2; i++) {
wsum += urn[i] * oweights[i];
wcum[i] = wsum;
}
// take one item nn times
j = colors2-1;
do {
// get random color according to probability distribution wcum
p = Random() * wcum[colors2-1];
// get color from search in probability distribution wcum
for (i=0; i < j; i++) {
if (p < wcum[i]) break;
}
// sample one ball of color i
osample[i]++; urn[i]--; nn--;
// check if this color has been exhausted
if (urn[i] == 0) {
if (i != j) {
// put exhausted color at the end of lists so that colors2 can be reduced
m = osource[i]; osource[i] = osource[j]; osource[j] = m;
m = urn[i]; urn[i] = urn[j]; urn[j] = m;
m = osample[i]; osample[i] = osample[j]; osample[j] = m;
w = oweights[i]; oweights[i] = oweights[j]; oweights[j] = w;
// update order2 list (no longer sorted by weight)
for (k=0; k<colors; k++) {
if (order2[k] == i) order2[k] = j; else
if (order2[k] == j) order2[k] = i;
}
}
colors2--; j = colors2-1; // decrement number of colors left in urn
if (colors2 == 2 && nn > 50) {
// two colors left. use univariate distribution for the rest
x = WalleniusNCHyp(nn, urn[0], urn[0]+urn[1], oweights[0]/oweights[1]);
osample[0] += x;
osample[1] += nn - x;
break;}
if (colors2 == 1) {
// only one color left. The rest is deterministic
osample[0] += nn;
break;
}
// make sure wcum is re-calculated from beginning
i = 0;
}
// update list of accumulated probabilities
wsum = i > 0 ? wcum[i-1] : 0.;
for (k=i; k<colors2; k++) {
wsum += urn[k] * oweights[k];
wcum[k] = wsum;
}
}
while (nn);
}
else {
// use conditional method to make starting point for
// Metropolis-Hastings sampling
// divide weights into two groups, heavy and light
a = 0; b = colors2-1;
w = sqrt(oweights[0] * oweights[colors2-1]);
do {
c = (a + b) / 2;
if (oweights[c] > w) a = c; else b = c;
}
while (b > a + 1);
// heavy group goes from 0 to b-1, light group goes from b to colors2-1
// calculate mean weight for heavy color group
for (i=0, m1=0, wsum=0; i < b; i++) {
m1 += urn[i]; wsum += oweights[i] * urn[i];
}
w1 = wsum / m1;
// calculate mean weight for light color group
for (i=b, m2=0, wsum=0; i < colors2; i++) {
m2 += urn[i]; wsum += oweights[i] * urn[i];
}
w2 = wsum / m2;
// split partial sample n into heavy (n1) and light (n2)
n1 = WalleniusNCHyp(n, m1, m1+m2, w1/w2);
n2 = n - n1;
// set parameters for first group (heavy)
a = 0; ng = n1;
// loop twice, for the two groops
for (k=0; k < 2; k++) {
// split group into single colors by calling univariate distribution b-a-1 times
for (i = a; i < b-1; i++) {
m = urn[i]; w = oweights[i];
// calculate mean weight of remaining colors
for (j=i+1, msum=0, wsum=0; j < b; j++) {
m1 = urn[j]; w1 = oweights[j];
msum += m1; wsum += m1 * w1;
}
// sample color i in group
x = wsum ? WalleniusNCHyp(ng, m, msum + m, w * msum / wsum) : ng;
osample[i] = x;
ng -= x;
}
// get the last one in the group
osample[i] = ng;
// set parameters for second group (light)
a = b; b = colors2; ng = n2;
}
// finished with conditional method.
// osample contains starting point for Metropolis-Hastings sampling
// make object for calculating probabilities and mean
CMultiWalleniusNCHypergeometric wmnc(n, osource, oweights, colors2);
wmnc.mean(var); // calculate mean
// calculate approximate variance from mean
for (i=0; i<colors; i++) {
r1 = var[i] * (osource[i]-var[i]);
r2 = (n-var[i])*(var[i]+N-n-osource[i]);
if (r1 <= 0. || r2 <= 0.) {
var[i] = 0.;
}
else {
var[i] = N*r1*r2/((N-1)*(osource[i]*r2+(N-osource[i])*r1));
}
}
// sort again, this time by variance
for (i=0; i < colors2-1; i++) {
c = order3[i]; k = i;
w = var[c];
for (j=i+1; j < colors2; j++) {
c2 = order3[j];
if (var[c2] > w) {
w = var[c2]; k = j;
}
}
order3[i] = order3[k]; order3[k] = c;
}
// number of scans (this value of nHastings has not been fine-tuned)
nHastings = 4;
if (accuracy < 1E-6) nHastings = 6;
if (colors2 > 5) nHastings++;
// Metropolis-Hastings sampler
f0 = -1.;
for (k = 0; k < nHastings; k++) {
for (i = 0; i < colors2; i++) {
j = i+1;
if (j >= colors2) j = 0;
c1 = order3[i]; c2 = order3[j];
w = oweights[c1] / oweights[c2];
n1 = osample[c1] + osample[c2];
x0 = osample[c1];
x = WalleniusNCHyp(n1, osource[c1], osource[c1]+osource[c2], w);
if (x == x0) continue; // accepted
if (f0 < 0.) f0 = wmnc.probability(osample);
CWalleniusNCHypergeometric nc(n1, osource[c1], osource[c1]+osource[c2], w, accuracy);
g0 = nc.probability(x0);
g1 = nc.probability(x);
osample[c1] = x;
osample[c2] = n1 - x;
f1 = wmnc.probability(osample);
g0 = f1 * g0; g1 = f0 * g1;
if (g0 >= g1 || g0 > g1 * Random()) {
// new state accepted
f0 = -1.;
}
else {
// rejected. restore old sample
osample[c1] = x0;
osample[c2] = n1 - x0;
}
}
}
}
}
// finished sampling by either method
// un-sort sample into destination and untangle re-orderings
for (i=0; i < colors; i++) {
c1 = order1[i]; c2 = order2[i];
if (source[c1] == osource[c2]) {
destination[c1] = osample[c2];
}
else {
// split colors with same weight that have been treated as one
x = Hypergeometric(osample[c2], source[c1], osource[c2]);
destination[c1] = x;
osample[c2] -= x;
osource[c2] -= source[c1];
}
}
}
/******************************************************************************
Multivariate complementary Wallenius noncentral hypergeometric distribution
******************************************************************************/
void StochasticLib3::MultiComplWalleniusNCHyp (
int32_t * destination, int32_t * source, double * weights, int32_t n, int colors) {
// This function generates a vector of random variables with the multivariate
// complementary Wallenius noncentral hypergeometric distribution.
// See MultiWalleniusNCHyp for details.
double rweights[MAXCOLORS]; // reciprocal weights
int32_t sample[MAXCOLORS]; // balls sampled
double w; // weight
int32_t N; // total number of balls
int i; // color index
// make reciprocal weights and calculate N
for (i=0, N=0; i<colors; i++) {
w = weights[i];
if (w == 0) FatalError("Zero weight in function MultiComplWalleniusNCHyp");
rweights[i] = 1. / w;
N += source[i];
}
// use multivariate Wallenius noncentral hypergeometric distribution
MultiWalleniusNCHyp(sample, source, rweights, N - n, colors);
// complementary distribution = balls not taken
for (i=0; i<colors; i++) {
destination[i] = source[i] - sample[i];
}
}
/******************************************************************************
Fisher's noncentral hypergeometric distribution
******************************************************************************/
int32_t StochasticLib3::FishersNCHyp (int32_t n, int32_t m, int32_t N, double odds) {
/*
This function generates a random variate with Fisher's noncentral
hypergeometric distribution.
This distribution resembles Wallenius noncentral hypergeometric distribution
and the two distributions are sometimes confused. A more detailed
explanation of this distribution is given below under the multivariate
Fisher's noncentral hypergeometric distribution (MultiFishersNCHyp).
For further documentation see nchyp.pdf, awailable from www.agner.org/random
This function uses inversion by chop-down search from zero when parameters
are small, and the ratio-of-uniforms rejection method when the former
method would be too slow or would give overflow.
*/
int32_t fak, addd; // used for undoing transformations
int32_t x; // result
// check if parameters are valid
if (n > N || m > N || n < 0 || m < 0 || odds <= 0.) {
if (odds == 0.) {
if (n > N-m) FatalError("Not enough items with nonzero weight in function FishersNCHyp");
return 0;
}
FatalError("Parameter out of range in function FishersNCHyp");}
if (odds == 1.) {
// use hypergeometric function if odds == 1
return Hypergeometric(n, m, N);
}
// symmetry transformations
fak = 1; addd = 0;
if (m > N/2) {
// invert m
m = N - m;
fak = -1; addd = n;
}
if (n > N/2) {
// invert n
n = N - n;
addd += fak * m; fak = - fak;
}
if (n > m) {
// swap n and m
x = n; n = m; m = x;
}
// cases with only one possible result end here
if (n == 0 || odds == 0.) return addd;
if (fak == -1) {
// reciprocal odds if inverting
odds = 1. / odds;
}
// choose method
if (n < 30 && N < 1024 && odds > 1.E-5 && odds < 1.E5) {
// use inversion by chop down method
x = FishersNCHypInversion (n, m, N, odds);
}
else {
// use ratio-of-uniforms method
x = FishersNCHypRatioOfUnifoms (n, m, N, odds);
}
// undo symmetry transformations
return x * fak + addd;
}
/***********************************************************************
Subfunctions used by FishersNCHyp
***********************************************************************/
int32_t StochasticLib3::FishersNCHypInversion (int32_t n, int32_t m, int32_t N, double odds) {
/*
Subfunction for FishersNCHyp distribution.
Implements Fisher's noncentral hypergeometric distribution by inversion
method, using chop-down search starting at zero.
Valid only for 0 <= n <= m <= N/2.
Without overflow check the parameters must be limited to n < 30, N < 1024,
and 1.E-5 < odds < 1.E5. This limitation is acceptable because this method
is slow for higher n.
The execution time of this function grows with n.
See the file nchyp.pdf for theoretical explanation.
*/
int32_t x; // x value
int32_t L; // derived parameter
double f; // scaled function value
double sum; // scaled sum of function values
double a1, a2, b1, b2, f1, f2; // factors in recursive calculation
double u; // uniform random variate
L = N - m - n;
if (n != fnc_n_last || m != fnc_m_last || N != fnc_N_last || odds != fnc_o_last) {
// parameters have changed. set-up
fnc_n_last = n; fnc_m_last = m; fnc_N_last = N; fnc_o_last = odds;
// f(0) is set to an arbitrary value because it cancels out.
// A low value is chosen to avoid overflow.
fnc_f0 = 1.E-100;
// calculate summation of e(x), using the formula:
// f(x) = f(x-1) * (m-x+1)*(n-x+1)*odds / (x*(L+x))
// All divisions are avoided by scaling the parameters
sum = f = fnc_f0; fnc_scale = 1.;
a1 = m; a2 = n; b1 = 1; b2 = L + 1;
for (x = 1; x <= n; x++) {
f1 = a1 * a2 * odds;
f2 = b1 * b2;
a1--; a2--; b1++; b2++;
f *= f1;
sum *= f2;
fnc_scale *= f2;
sum += f;
// overflow check. not needed if parameters are limited:
// if (sum > 1E100) {sum *= 1E-100; f *= 1E-100; fnc_scale *= 1E-100;}
}
fnc_f0 *= fnc_scale;
fnc_scale = sum;
// now f(0) = fnc_f0 / fnc_scale.
// We are still avoiding all divisions by saving the scale factor
}
// uniform random
u = Random() * fnc_scale;
// recursive calculation:
// f(x) = f(x-1) * (m-x+1)*(n-x+1)*odds / (x*(L+x))
f = fnc_f0; x = 0; a1 = m; a2 = n; b1 = 0; b2 = L;
do {
u -= f;
if (u <= 0) break;
x++; b1++; b2++;
f *= a1 * a2 * odds;
u *= b1 * b2;
// overflow check. not needed if parameters are limited:
// if (u > 1.E100) {u *= 1E-100; f *= 1E-100;}
a1--; a2--;
}
while (x < n);
return x;
}
int32_t StochasticLib3::FishersNCHypRatioOfUnifoms (int32_t n, int32_t m, int32_t N, double odds) {
/*
Subfunction for FishersNCHyp distribution.
Valid for 0 <= n <= m <= N/2, odds != 1
Fisher's noncentral hypergeometric distribution by ratio-of-uniforms
rejection method.
The execution time of this function is almost independent of the parameters.
*/
int32_t L; // N-m-n
int32_t mode; // mode
double mean; // mean
double variance; // variance
double x; // real sample
int32_t k; // integer sample
double u; // uniform random
double lf; // ln(f(x))
double AA, BB, g1, g2; // temporary
L = N - m - n;
if (n != fnc_n_last || m != fnc_m_last || N != fnc_N_last || odds != fnc_o_last) {
// parameters have changed. set-up
fnc_n_last = n; fnc_m_last = m; fnc_N_last = N; fnc_o_last = odds;
// find approximate mean
AA = (m+n)*odds+L; BB = sqrt(AA*AA - 4*odds*(odds-1)*m*n);
mean = (AA-BB)/(2*(odds-1));
// find approximate variance
AA = mean * (m-mean); BB = (n-mean)*(mean+L);
variance = N*AA*BB/((N-1)*(m*BB+(n+L)*AA));
// compute log(odds)
fnc_logb = log(odds);
// find center and width of hat function
fnc_a = mean + 0.5;
fnc_h = 1.028 + 1.717*sqrt(variance+0.5) + 0.032*fabs(fnc_logb);
// find safety bound
fnc_bound = (int32_t)(mean + 4.0 * fnc_h);
if (fnc_bound > n) fnc_bound = n;
// find mode
mode = (int32_t)(mean);
g1 =(double)(m-mode)*(n-mode)*odds;
g2 =(double)(mode+1)*(L+mode+1);
if (g1 > g2 && mode < n) mode++;
// value at mode to scale with:
fnc_lfm = mode * fnc_logb - fc_lnpk(mode, L, m, n);
}
while(1) {
u = Random();
if (u == 0) continue; // avoid divide by 0
x = fnc_a + fnc_h * (Random()-0.5)/u;