Permalink
Browse files

Added scalar-abs-value function to improve performance of various sta…

…ts function
  • Loading branch information...
1 parent 53daef3 commit bd6dcc4f534a80089681d1f200a9a12eb92fd9c7 David Williams committed Mar 17, 2012
Showing with 19 additions and 20 deletions.
  1. +17 −18 modules/incanter-core/src/incanter/stats.clj
  2. +2 −2 modules/incanter-core/test/incanter/stats_tests.clj
@@ -43,6 +43,13 @@
matrix length log10 sum sum-of-squares sel matrix?
cumulative-sum solve vectorize bind-rows)]))
+(defn scalar-abs
+ "Fast absolute value function"
+ [x]
+ (if (< x 0)
+ (*' -1 x)
+ x))
+
(defn- deep-merge-with
"Copied here from clojure.contrib.map-utils. The original may have
been a casualty of the clojure.contrib cataclysm.
@@ -2089,7 +2096,7 @@
coef-var (mult mse xtxi)
std-errors (sqrt (diag coef-var))
t-tests (div coefs std-errors)
- t-probs (mult 2 (cdf-t (abs t-tests) :df df2 :lower-tail false))
+ t-probs (mult 2 (cdf-t (scalar-abs t-tests) :df df2 :lower-tail false))
t-95 (mult (quantile-t 0.975 :df df2) std-errors)
coefs-ci (if (number? std-errors)
[(plus coefs t-95)
@@ -2232,7 +2239,7 @@
[coll mu]
(* 2
(cdf-t
- (- (abs (simple-t-test coll mu)))
+ (- (scalar-abs (simple-t-test coll mu)))
:df (dec (count coll)))))
(defn simple-ci
@@ -2503,7 +2510,7 @@ Test for different variances between 2 samples
(/ (* (c-margins c) (r-margins r)) N))
(mult N probs))
X-sq (if (and correct (and (= (count r-levels) 2) (= (count c-levels) 2)))
- (reduce + (map (fn [o e] (/ (pow (- (abs (- o e)) 0.5) 2) e)) counts E))
+ (reduce + (map (fn [o e] (/ (pow (- (scalar-abs (- o e)) 0.5) 2) e)) counts E))
(reduce + (map (fn [o e] (/ (pow (- o e) 2) e)) counts E)))
]
{:X-sq X-sq
@@ -2763,7 +2770,7 @@ Test for different variances between 2 samples
y is within z of x in metric space.
"
[z x y]
- (< (abs (- x y)) z))
+ (< (scalar-abs (- x y)) z))
(defn square-devs-from-mean
"takes either a sample or a sample and a precalculated mean.
@@ -3076,13 +3083,6 @@ Legendre[2] discusses a variant of the W statistic which accommodates ties in th
;;TODO: add graphical approaches to similarity: http://en.wikipedia.org/wiki/SimRank
;;TODO: string similarity measures: http://en.wikipedia.org/wiki/String_metric
-(defn fast-abs
- "Fast absolute value function"
- [x]
- (if (< x 0)
- (*' -1 x)
- x))
-
(defn minkowski-distance
"http://en.wikipedia.org/wiki/Minkowski_distance
http://en.wikipedia.org/wiki/Lp_space
@@ -3098,7 +3098,7 @@ In the limiting case of p reaching infinity we obtain the Chebyshev distance."
(reduce +
(map
#(pow
- (fast-abs
+ (scalar-abs
(pow (- %1 %2) p)))
a b))
(/ 1 p)))
@@ -3114,11 +3114,10 @@ the Euclidean distance or Euclidean metric is the ordinary distance between two
"In the limiting case of Lp reaching infinity we obtain the Chebyshev distance."
[a b]
{:pre [(= (count a) (count b))]}
-(apply
- tree-comp-each
- max
- (fn [[x y]] (abs (- x y)))
- (map vector a b)))
+(reduce max
+ (map
+ #(scalar-abs (- %1 %2))
+ a b)))
(defn manhattan-distance
"http://en.wikipedia.org/wiki/Manhattan_distance
@@ -3292,7 +3291,7 @@ The metric space induced by the Lee distance is a discrete analog of the ellipti
tree-comp-each
+
(fn [x]
- (let [diff (abs (apply - (map int x)))]
+ (let [diff (scalar-abs (apply - (map int x)))]
(min diff (- q diff))))
(map vector a b)))))
@@ -235,10 +235,10 @@
(is (= 1 (damerau-levenshtein-distance b c)))
(is (= 3 (damerau-levenshtein-distance a c)))))
-(deftest fast-abs-test
+(deftest scalar-abs-test
(is
(= 9223372036854775808
- (fast-abs -9223372036854775808))))
+ (scalar-abs -9223372036854775808))))
(deftest euclid
(is

0 comments on commit bd6dcc4

Please sign in to comment.