Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

reservoir sampling

  • Loading branch information...
commit f110dd4d4463d64e6ce23ceb3ce6516f557471cf 1 parent 3c8721f
@stuarthalloway stuarthalloway authored
View
14 src/main/clojure/clojure/data/generators.clj
@@ -282,6 +282,20 @@ instance you can get a repeatable basis for tests."
;; we'll get the same shuffle, given the same *rnd*.
(fisher-yates coll))
+(defn reservoir-sample
+ "Reservoir sample ct items from coll, using *rnd*."
+ [ct coll]
+ (loop [result (transient (core/vec (take ct coll)))
+ n ct
+ coll (drop ct coll)]
+ (if (seq coll)
+ (let [pos (uniform 0 n)]
+ (recur (if (< pos ct)
+ (assoc! result pos (first coll))
+ result)
+ (inc n)
+ (rest coll)))
+ (persistent! result))))
View
10 src/test/clojure/clojure/data/generators_test.clj
@@ -9,3 +9,13 @@
shuf (gen/shuffle coll)]
(is (= (into #{} coll)
(into #{} shuf))))))
+
+(deftest test-reservoir-sample-consistency
+ []
+ (dotimes [n 50]
+ (let [coll (range 100)
+ sample-1 (binding [gen/*rnd* (java.util.Random. n)]
+ (gen/reservoir-sample 10 coll))
+ sample-2 (binding [gen/*rnd* (java.util.Random. n)]
+ (gen/reservoir-sample 10 coll))]
+ (is (= sample-1 sample-2)))))
Please sign in to comment.
Something went wrong with that request. Please try again.