Skip to content

Commit

Permalink
Add reservoir sampling (close #18)
Browse files Browse the repository at this point in the history
  • Loading branch information
eush77 committed Jun 7, 2014
1 parent 0b8a4df commit 3f7b430
Show file tree
Hide file tree
Showing 3 changed files with 104 additions and 1 deletion.
47 changes: 47 additions & 0 deletions algorithms/math/reservoir_sampling.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* Copyright (C) 2014 Eugene Sharygin
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
'use strict';


/**
* Sample random elements from the array using reservoir algorithm.
*
* @param {Array} array
* @param {number} sampleSize
* @return {Array}
*/
var reservoirSampling = function (array, sampleSize) {
if (sampleSize > array.length) {
throw new Error('Sample size exceeds the total number of elements.');
}
var reservoir = array.slice(0, sampleSize);
for (var i = sampleSize; i < array.length; ++i) {
var j = Math.floor(Math.random() * (i + 1));
if (j < sampleSize) {
reservoir[j] = array[i];
}
}
return reservoir;
};


module.exports = reservoirSampling;
3 changes: 2 additions & 1 deletion main.js
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,8 @@ var lib = {
fisherYates: require('./algorithms/math/fisher_yates'),
gcd: require('./algorithms/math/gcd'),
extendedEuclidean: require('./algorithms/math/extended_euclidean'),
newtonSqrt: require('./algorithms/math/newton_sqrt')
newtonSqrt: require('./algorithms/math/newton_sqrt'),
reservoirSampling: require('./algorithms/math/reservoir_sampling'),
},
Search: {
bfs: require('./algorithms/searching/bfs'),
Expand Down
55 changes: 55 additions & 0 deletions test/algorithms/math/reservoir_sampling.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/**
* Copyright (C) 2014 Eugene Sharygin
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
'use strict';


var reservoirSampling = require('../../../algorithms/math/reservoir_sampling'),
assert = require('assert');


describe('Reservoir Sampling', function () {
var array = [1, 2, 3, 4, 5, 6, 7, 8, 9];

it('should sample K distinct values from the array', function () {
var sample = reservoirSampling(array, 5);
assert.equal(sample.length, 5);
var seen = {};
array.forEach(function (value) {
assert(!seen[value]);
assert(array.indexOf(value) >= 0);
seen[value] = true;
});
});

it('should work in corner cases', function () {
assert.deepEqual(reservoirSampling(array, 0), []);
assert.deepEqual(reservoirSampling([], 0), []);
var fullSample = reservoirSampling(array, array.length);
assert.deepEqual(fullSample.sort(), array);
});

it('should raise an error if asked for too many elements', function () {
assert.throws(function () {
reservoirSampling(array, array.length + 1);
});
});
});

0 comments on commit 3f7b430

Please sign in to comment.