Skip to content
Browse files

Tracks min/max across merges and adds clj/java transform fns

  • Loading branch information...
1 parent c98ca55 commit 0f0b29bb7031a0a8ae68fc6093573238dfd6ad60 @ashenfad ashenfad committed May 10, 2012
Showing with 118 additions and 7 deletions.
  1. +1 −1 project.clj
  2. +31 −4 src/clj/histogram/core.clj
  3. +68 −1 src/java/com/bigml/histogram/Histogram.java
  4. +18 −1 test/histogram/test/core.clj
View
2 project.clj
@@ -1,4 +1,4 @@
-(defproject histogram "1.9.2"
+(defproject histogram "1.9.3"
:description "Dynamic/streaming histograms"
:source-path "src/clj"
:java-source-path "src/java"
View
35 src/clj/histogram/core.clj
@@ -236,10 +236,7 @@
"Returns the bins contained in the histogram. A missing bin (mean is
nil) is included if it's non-empty."
[^Histogram hist]
- (let [bins (map scrub-bin (.getBins hist))]
- (if (pos? (.getMissingCount hist))
- (conj bins (missing-bin hist))
- bins)))
+ (map scrub-bin (.getBins hist)))
(defn minimum
"Returns the minimum value inserted into the histogram."
@@ -266,3 +263,33 @@
:max (+ l-mean (* 1.1 (- l-mean (:mean (last (drop-last bins))))))}
{:min f-mean
:max l-mean})))))
+
+(defn hist-to-clj
+ "Transforms a Histogram object into a Clojure map representing the
+ histogram."
+ [^Histogram hist]
+ (into {} (remove (comp nil? second)
+ {:max-bins (.getMaxBins hist)
+ :gap-weighted? (.isCountWeightedGaps hist)
+ :group-types (seq (.getGroupTypes hist))
+ :categories (seq (.getTargetCategories hist))
+ :bins (bins hist)
+ :missing-bin (when (pos? (.getMissingCount hist))
+ (missing-bin hist))
+ :minimum (minimum hist)
+ :maximum (maximum hist)})))
+
+(defn clj-to-hist
+ "Transforms a Clojure map representing a histogram into a Histogram
+ object."
+ [hist-map]
+ (let [{:keys [max-bins gap-weighted? group-types categories bins
+ missing-bin maximum minimum]} hist-map
+ hist (create :bins max-bins :gap-weighted? gap-weighted?
+ :group-types group-types :categories categories)]
+ (doseq [bin bins]
+ (insert-bin! hist bin))
+ (when minimum (.setMinimum hist minimum))
+ (when maximum (.setMaximum hist maximum))
+ (when missing-bin (insert-bin! hist missing-bin))
+ hist))
View
69 src/java/com/bigml/histogram/Histogram.java
@@ -2,6 +2,8 @@
import java.text.DecimalFormat;
import java.util.ArrayList;
+import java.util.List;
+import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map.Entry;
@@ -218,7 +220,6 @@ public TargetType getTargetType() {
return _targetType;
}
-
/**
* Returns the target types for a group histogram
*/
@@ -227,6 +228,36 @@ public TargetType getTargetType() {
}
/**
+ * Returns the maximum number of allowed bins.
+ */
+ public int getMaxBins() {
+ return _maxBins;
+ }
+
+ /**
+ * Returns whether gaps are count weighted.
+ */
+ public boolean isCountWeightedGaps() {
+ return _countWeightedGaps;
+ }
+
+ /**
+ * Returns the categories for an array-backed
+ * categorical histogram
+ */
+ public List<Object> getTargetCategories() {
+ List<Object> categories = null;
+ if (_indexMap != null) {
+ Object[] catArray = new Object[_indexMap.size()];
+ for (Entry<Object, Integer> entry : _indexMap.entrySet()) {
+ catArray[entry.getValue()] = entry.getKey();
+ }
+ categories = Arrays.asList(catArray);
+ }
+ return categories;
+ }
+
+ /**
* Returns the approximate number of points less than
* <code>p</code>.
*
@@ -472,6 +503,18 @@ public Histogram merge(Histogram<T> histogram) throws MixedInsertException {
mergeBins();
}
+ if (_minimum == null) {
+ _minimum = histogram.getMinimum();
+ } else if (histogram.getMinimum() != null){
+ _minimum = Math.min(_minimum, histogram.getMinimum());
+ }
+
+ if (_maximum == null) {
+ _maximum = histogram.getMaximum();
+ } else if (histogram.getMaximum() != null){
+ _maximum = Math.max(_maximum, histogram.getMaximum());
+ }
+
if (_missingTarget == null) {
_missingTarget = (T) histogram.getMissingTarget();
} else {
@@ -567,6 +610,30 @@ public Double getMaximum() {
return _maximum;
}
+ /**
+ * Sets the minimum input value for the histogram. This
+ * method should only be used for histograms created
+ * by inserting pre-existing bins.
+ *
+ * @param minimum the minimum value observed by the histogram
+ */
+ public Histogram setMinimum(Double minimum) {
+ _minimum = minimum;
+ return this;
+ }
+
+ /**
+ * Sets the maximum input value for the histogram. This
+ * method should only be used for histograms created
+ * by inserting pre-existing bins.
+ *
+ * @param maximum the maximum value observed by the histogram
+ */
+ public Histogram setMaximum(Double maximum) {
+ _maximum = maximum;
+ return this;
+ }
+
private void checkType(TargetType newType) throws MixedInsertException {
if (_targetType == null) {
_targetType = newType;
View
19 test/histogram/test/core.clj
@@ -238,4 +238,21 @@
(create)
(repeatedly 1000 #(rand-int 10)))]
(is (== 0 (minimum hist)))
- (is (== 9 (maximum hist)))))
+ (is (== 9 (maximum hist))))
+ (let [hist1 (reduce insert! (create) (range 0 4))
+ hist2 (reduce insert! (create) (range 2 6))
+ merged (-> (create) (merge! hist1) (merge! hist2))]
+ (is (== 0 (minimum merged)))
+ (is (== 5 (maximum merged)))))
+
+(deftest transform-test
+ (let [hist1 (reduce (fn [h [x y]] (insert! h x y))
+ (create :bins 8 :gap-weighted? true
+ :categories [:apple :orange :grape])
+ (cat-data 1000 false))
+ hist1 (insert! hist1 nil :apple)
+ hist2 (clj-to-hist (hist-to-clj hist1))]
+ (is (= (bins hist1) (bins hist2)))
+ (is (= (missing-bin hist1) (missing-bin hist2)))
+ (is (= (minimum hist1) (minimum hist2)))
+ (is (= (maximum hist1) (maximum hist2)))))

0 comments on commit 0f0b29b

Please sign in to comment.
Something went wrong with that request. Please try again.