From 3f7b430219d5ff7b1dc18d4bbd34861847393e7f Mon Sep 17 00:00:00 2001 From: eush77 Date: Sat, 7 Jun 2014 19:21:01 +0400 Subject: [PATCH] Add reservoir sampling (close #18) --- algorithms/math/reservoir_sampling.js | 47 ++++++++++++++++++ main.js | 3 +- test/algorithms/math/reservoir_sampling.js | 55 ++++++++++++++++++++++ 3 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 algorithms/math/reservoir_sampling.js create mode 100644 test/algorithms/math/reservoir_sampling.js diff --git a/algorithms/math/reservoir_sampling.js b/algorithms/math/reservoir_sampling.js new file mode 100644 index 0000000..1950691 --- /dev/null +++ b/algorithms/math/reservoir_sampling.js @@ -0,0 +1,47 @@ +/** + * Copyright (C) 2014 Eugene Sharygin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +'use strict'; + + +/** + * Sample random elements from the array using reservoir algorithm. + * + * @param {Array} array + * @param {number} sampleSize + * @return {Array} + */ +var reservoirSampling = function (array, sampleSize) { + if (sampleSize > array.length) { + throw new Error('Sample size exceeds the total number of elements.'); + } + var reservoir = array.slice(0, sampleSize); + for (var i = sampleSize; i < array.length; ++i) { + var j = Math.floor(Math.random() * (i + 1)); + if (j < sampleSize) { + reservoir[j] = array[i]; + } + } + return reservoir; +}; + + +module.exports = reservoirSampling; diff --git a/main.js b/main.js index 65f9c21..99dac3a 100644 --- a/main.js +++ b/main.js @@ -35,7 +35,8 @@ var lib = { fisherYates: require('./algorithms/math/fisher_yates'), gcd: require('./algorithms/math/gcd'), extendedEuclidean: require('./algorithms/math/extended_euclidean'), - newtonSqrt: require('./algorithms/math/newton_sqrt') + newtonSqrt: require('./algorithms/math/newton_sqrt'), + reservoirSampling: require('./algorithms/math/reservoir_sampling'), }, Search: { bfs: require('./algorithms/searching/bfs'), diff --git a/test/algorithms/math/reservoir_sampling.js b/test/algorithms/math/reservoir_sampling.js new file mode 100644 index 0000000..915d4e0 --- /dev/null +++ b/test/algorithms/math/reservoir_sampling.js @@ -0,0 +1,55 @@ +/** + * Copyright (C) 2014 Eugene Sharygin + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ +'use strict'; + + +var reservoirSampling = require('../../../algorithms/math/reservoir_sampling'), + assert = require('assert'); + + +describe('Reservoir Sampling', function () { + var array = [1, 2, 3, 4, 5, 6, 7, 8, 9]; + + it('should sample K distinct values from the array', function () { + var sample = reservoirSampling(array, 5); + assert.equal(sample.length, 5); + var seen = {}; + array.forEach(function (value) { + assert(!seen[value]); + assert(array.indexOf(value) >= 0); + seen[value] = true; + }); + }); + + it('should work in corner cases', function () { + assert.deepEqual(reservoirSampling(array, 0), []); + assert.deepEqual(reservoirSampling([], 0), []); + var fullSample = reservoirSampling(array, array.length); + assert.deepEqual(fullSample.sort(), array); + }); + + it('should raise an error if asked for too many elements', function () { + assert.throws(function () { + reservoirSampling(array, array.length + 1); + }); + }); +});