Skip to content
Browse files

Adds sample.stream/multi-reduce and refactors tests

  • Loading branch information...
1 parent fa95714 commit ccb74f78c68a4a14eb31238a7345239b6bf6958f @ashenfad ashenfad committed Jul 30, 2012
Showing with 147 additions and 73 deletions.
  1. +1 −0 .gitignore
  2. +25 −6 README.md
  3. +35 −14 src/sample/stream.clj
  4. +5 −53 test/sample/test/core.clj
  5. +30 −0 test/sample/test/reservoir.clj
  6. +38 −0 test/sample/test/stream.clj
  7. +13 −0 test/sample/test/util.clj
View
1 .gitignore
@@ -2,5 +2,6 @@ pom.xml
*jar
/lib/
/classes/
+/target/
.lein-deps-sum
.lein-failures
View
31 README.md
@@ -168,9 +168,9 @@ test> (take 10 (stream/sample (range) 1 1000 :rate true))
(1149 1391 1562 3960 4359 4455 5141 5885 6310 7568 7828)
```
-### Multi-Sampling
+### Multi-Sample
-The `stream/multi-sample!` fn can be used to generate multiple
+The `stream/multi-sample` fn can be used to generate multiple
samplings in one pass over the population. The fn takes the
population followed by sets of sampling parameters, one for each
desired sampling.
@@ -179,7 +179,7 @@ Each set of sample parameters should be composed of a consumer fn, the
sample size, the population size, and optionally the parameters
`:replace`, `:seed`, and `:rate`.
-`multi-sample!` will generate a unique sampling for every parameter
+`multi-sample` will generate a unique sampling for every parameter
set. Whenever a value is sampled, it will be consumed by the
parameter set's consumer fn. A consumer fn should accept a single
parameter.
@@ -196,9 +196,9 @@ test> (defn award-gift-certificate! [customer-id]
test> (defn award-hawaiian-vacation! [customer-id]
(println "Customer" customer-id "wins a Hawaiian vacation."))
test> (def customer-ids (range 1000))
-test> (stream/multi-sample! customer-ids
- [award-gift-certificate! 1 100 :rate true]
- [award-hawaiian-vacation! 1 500 :rate true])
+test> (stream/multi-sample customer-ids
+ [award-gift-certificate! 1 100 :rate true]
+ [award-hawaiian-vacation! 1 500 :rate true])
Customer 161 wins a Hawaiian vacation.
Customer 427 wins a gift certificate.
Customer 627 wins a gift certificate.
@@ -211,3 +211,22 @@ Customer 794 wins a gift certificate.
Customer 833 wins a Hawaiian vacation.
Customer 836 wins a gift certificate.
```
+
+### Multi-Reduce
+
+`multi-reduce` is very similar to `multi-sample`, except every set of
+sample parameters defines a sampling along with a reduction function.
+So each set of sample parameters should be composed of a reduce fn, an
+initial reduce value, the sample size, the population size, and
+optionally the `:replace`, `:seed`, and `:rate` parameters.
+
+`multi-reduce` will return a seq of values, each value being the final
+reduction for a sampling. A reducer fn should accept two parameters.
+
+An example:
+
+```clojure
+test> (stream/multi-reduce (range) [+ 0 20 30 :seed 3]
+ [+ 0 20 30 :seed 4])
+(269 291)
+```
View
49 src/sample/stream.clj
@@ -77,8 +77,13 @@
(map (apply create sample-size pop-size opts)
coll))))
-(defn multi-sample!
- "multi-sample! expects a collection followed by one or more sets of
+(defn- multi-stream [coll opts-list]
+ (take-while #(some identity %)
+ (map (apply juxt (map #(apply create %) opts-list))
+ coll)))
+
+(defn multi-sample
+ "multi-sample expects a collection followed by one or more sets of
sample parameters, each defining a unique sampling of the
population.
@@ -87,7 +92,7 @@
':seed', and ':rate' parameters. See the documentation for
'sample' for more about the parameters.
- multi-sample! will create a unique set of samples for every
+ multi-sample will create a unique set of samples for every
parameter set. Whenever a value is sampled, it will be consumed by
the parameter set's consumer fn. A consumer fn should accept a
single parameter.
@@ -96,14 +101,30 @@
[#(println :bar %) 4 5 :replace true])"
[coll & opts-list]
(when (seq opts-list)
- (let [consumers (map first opts-list)
- stream (take-while #(some identity %)
- (map (apply juxt (map #(apply create %)
- (map next opts-list)))
- coll))]
- (doseq [samples stream]
- (doall (map (fn [consumer vals]
- (doseq [v vals]
- (consumer v)))
- consumers
- samples))))))
+ (let [consumers (map first opts-list)]
+ (doseq [samples (multi-stream coll (map next opts-list))]
+ (dorun (map (comp dorun map) consumers samples))))))
+
+(defn multi-reduce
+ "multi-reduce expects a collection followed by one or more sets of
+ sample parameters, each defining a unique sampling of the
+ population.
+
+ Each set of sample parameters should be composed of a reduce fn, an
+ initial reduce value, the sample size, the population size, and
+ optionally the ':replace', ':seed', and ':rate' parameters. See
+ the documentation for 'sample' for more about the parameters.
+
+ multi-reduce will create a reduction over the unique set of samples
+ for every parameter set. Whenever a value is sampled, it will be
+ reduced by the parameter set's reducer fn. A reducer fn should
+ accept two parameters.
+
+ Example: (multi-reduce (range) [+ 0 2 5]
+ [- 100 4 5 :replace true])"
+ [coll & opts-list]
+ (when (seq opts-list)
+ (let [reducers (map first opts-list)]
+ (reduce #(doall (map reduce reducers %1 %2))
+ (map second opts-list)
+ (multi-stream coll (map #(drop 2 %) opts-list))))))
View
58 test/sample/test/core.clj
@@ -5,70 +5,22 @@
;; Start date: Jun 27, 2012
(ns sample.test.core
- (:use clojure.test)
- (:require (sample [core :as core]
- [reservoir :as reservoir]
- [stream :as stream])))
+ (:use clojure.test
+ sample.test.util)
+ (:require (sample [core :as core])))
-(defn- about-eq
- "Returns true if the absolute value of the difference
- between the first two arguments is less than the third."
- [v1 v2 tol]
- (< (Math/abs (double (- v1 v2))) tol))
-
-(deftest simple-sample
+(deftest sample
(is (about-eq (reduce + (take 500 (core/sample (range 1000))))
250000 25000))
(is (about-eq (reduce + (take 500 (core/sample (range 1000) :replace true)))
250000 25000))
(let [[v1 v2] (vals (frequencies (take 1000 (core/sample [0 1] :replace true))))]
(is (about-eq v1 v2 150))))
-(deftest reservoir-sample
- (is (about-eq (reduce + (reservoir/sample (range 1000) 500))
- 250000 25000))
- (is (about-eq (reduce + (reservoir/sample (range 1000) 500 :replace true))
- 250000 25000))
- (is (= (reservoir/sample (range 20) 10 :seed 7)
- (reduce reservoir/insert
- (reservoir/create 10 :seed 7)
- (range 20))))
- (is (= (reservoir/sample (range 20) 10 :seed 7 :replace true)
- (reduce reservoir/insert
- (reservoir/create 10 :seed 7 :replace true)
- (range 20)))))
-
-(deftest stream-sample
- (is (about-eq (reduce + (stream/sample (range 1000) 500 1000))
- 250000 25000))
- (is (about-eq (reduce + (stream/sample (range 1000) 500 1000 :replace true))
- 250000 25000))
- (is (about-eq (reduce + (stream/sample (range 1000) 500 1000
- :replace true
- :rate true))
- 250000 35000)))
-
(deftest regression
(is (= (take 10 (core/sample (range 20) :seed :foo))
'(7 3 9 6 10 4 2 8 5 13)))
(is (= (take 10 (core/sample (range 20) :seed 7))
'(16 13 17 12 9 4 18 7 14 19)))
(is (= (take 10 (core/sample (range 20) :seed 7 :replace true))
- '(16 4 5 4 0 14 8 9 10 14)))
- (is (= (reservoir/sample (range 20) 10 :seed 7)
- [9 16 11 2 8 19 17 6 15 10]))
- (is (= (reservoir/sample (range 20) 10 :seed 7 :replace true)
- [13 10 9 16 7 2 15 17 4 14]))
- (is (= (stream/sample (range 20) 10 20 :seed 7)
- '(3 4 5 7 10 12 14 15 16 17)))
- (is (= (stream/sample (range 20) 10 20 :seed 7 :replace true)
- '(2 3 7 8 12 13 13 14 17 19)))
- (is (= (stream/sample (range 20) 10 20 :seed 7 :replace true :rate true)
- '(0 1 3 4 7 9 9 10 11 16 19)))
- (let [sum1 (atom 0)
- sum2 (atom 0)]
- (stream/multi-sample! (range)
- [(partial swap! sum1 +) 150 200 :seed 3]
- [(partial swap! sum2 +) 150 200 :seed 7 :replace true])
- (is (= 14557 @sum1))
- (is (= 15921 @sum2))))
+ '(16 4 5 4 0 14 8 9 10 14))))
View
30 test/sample/test/reservoir.clj
@@ -0,0 +1,30 @@
+;; Copyright (c) 2012 BigML, Inc
+;; All rights reserved.
+
+;; Author: Adam Ashenfelter <ashenfad@bigml.com>
+;; Start date: Jul 30, 2012
+
+(ns sample.test.reservoir
+ (:use clojure.test
+ sample.test.util)
+ (:require (sample [reservoir :as reservoir])))
+
+(deftest sample
+ (is (about-eq (reduce + (reservoir/sample (range 1000) 500))
+ 250000 25000))
+ (is (about-eq (reduce + (reservoir/sample (range 1000) 500 :replace true))
+ 250000 25000))
+ (is (= (reservoir/sample (range 20) 10 :seed 7)
+ (reduce reservoir/insert
+ (reservoir/create 10 :seed 7)
+ (range 20))))
+ (is (= (reservoir/sample (range 20) 10 :seed 7 :replace true)
+ (reduce reservoir/insert
+ (reservoir/create 10 :seed 7 :replace true)
+ (range 20)))))
+
+(deftest regression
+ (is (= (reservoir/sample (range 20) 10 :seed 7)
+ [9 16 11 2 8 19 17 6 15 10]))
+ (is (= (reservoir/sample (range 20) 10 :seed 7 :replace true)
+ [13 10 9 16 7 2 15 17 4 14])))
View
38 test/sample/test/stream.clj
@@ -0,0 +1,38 @@
+;; Copyright (c) 2012 BigML, Inc
+;; All rights reserved.
+
+;; Author: Adam Ashenfelter <ashenfad@bigml.com>
+;; Start date: Jul 30, 2012
+
+(ns sample.test.stream
+ (:use clojure.test
+ sample.test.util)
+ (:require (sample [stream :as stream])))
+
+(deftest sample
+ (is (about-eq (reduce + (stream/sample (range 1000) 500 1000))
+ 250000 25000))
+ (is (about-eq (reduce + (stream/sample (range 1000) 500 1000 :replace true))
+ 250000 25000))
+ (is (about-eq (reduce + (stream/sample (range 1000) 500 1000
+ :replace true
+ :rate true))
+ 250000 35000)))
+
+(deftest regression
+ (is (= (stream/sample (range 20) 10 20 :seed 7)
+ '(3 4 5 7 10 12 14 15 16 17)))
+ (is (= (stream/sample (range 20) 10 20 :seed 7 :replace true)
+ '(2 3 7 8 12 13 13 14 17 19)))
+ (is (= (stream/sample (range 20) 10 20 :seed 7 :replace true :rate true)
+ '(0 1 3 4 7 9 9 10 11 16 19)))
+ (let [sum1 (atom 0)
+ sum2 (atom 0)]
+ (stream/multi-sample (range)
+ [(partial swap! sum1 +) 150 200 :seed 3]
+ [(partial swap! sum2 +) 150 200 :seed 7 :replace true])
+ (= '(14557 15921)
+ '(@sum1 @sum2)
+ (stream/multi-reduce (range)
+ [+ 0 150 200 :seed 3]
+ [+ 0 150 200 :seed 7 :replace true]))))
View
13 test/sample/test/util.clj
@@ -0,0 +1,13 @@
+;; Copyright (c) 2012 BigML, Inc
+;; All rights reserved.
+
+;; Author: Adam Ashenfelter <ashenfad@bigml.com>
+;; Start date: Jul 30, 2012
+
+(ns sample.test.util)
+
+(defn about-eq
+ "Returns true if the absolute value of the difference
+ between the first two arguments is less than the third."
+ [v1 v2 tol]
+ (< (Math/abs (double (- v1 v2))) tol))

0 comments on commit ccb74f7

Please sign in to comment.
Something went wrong with that request. Please try again.