Skip to content

Commit

Permalink
features and linear model spike with commons-math.
Browse files Browse the repository at this point in the history
  • Loading branch information
Bradford Cross committed Mar 15, 2010
0 parents commit 79962c3
Show file tree
Hide file tree
Showing 7 changed files with 191 additions and 0 deletions.
25 changes: 25 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
build
target
lib
*.dot

# use glob syntax.
syntax: glob
creds.clj
Manifest.txt
pom.xml
aws.clj
*.ser
*.class
*.jar
*~
*.bak
*.off
*.old
.DS_Store
*.#*
*#*
*.classpath
*.project
*.settings
*.pyc
9 changes: 9 additions & 0 deletions project.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
(defproject infer "1.0-SNAPSHOT"
:description "inference and machine learning for clojure"
:dependencies [[org.clojure/clojure "1.2.0-master-SNAPSHOT"]
[org.clojure/clojure-contrib "1.2.0-master-SNAPSHOT"]
[org.apache.commons/commons-math "2.0"]
[org.incanter/incanter-core "1.0.0"]
[criterium/criterium "0.0.1-SNAPSHOT"]]
:dev-dependencies [[swank-clojure "1.1.0-SNAPSHOT"]]
:repositories {"incanter" "http://repo.incanter.org"})
50 changes: 50 additions & 0 deletions src/infer/features.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
(ns infer.features
(:use clojure.contrib.combinatorics)
(:use clojure.contrib.seq-utils)
(:use clojure.set))

(defn flatten-seqs
"Takes any nested combination of sequential things (lists, vectors,
etc.) and returns the lowest level sequential items as a sequence of sequences."
[x]
(let [contains-seq? (partial some sequential?)]
(filter (complement contains-seq?)
(rest (tree-seq contains-seq? seq x)))))

(defn feature-vectors
([vect m]
(for [[k v] m
:let [ky (if (= :missing k) 0 k)
next-vect (conj vect ky)]]
(cond (not (map? v))
(repeat v next-vect)
:otherwise
(feature-vectors next-vect v))))
([m] (flatten-seqs (feature-vectors [] m))))

(defn remove-at [i v]
(concat (subvec v 0 i)
(subvec v (+ i 1) (count v))))

(defn nth-is? [i pred coll]
(pred (nth coll i)))

(defn count-when [pred coll]
(count (filter pred coll)))

(defn vec-but-last [s]
(subvec s 0
(max 0 (- (count s) 1))))

(defn extract-ys [vs]
[(map last vs)
(map vec-but-last vs)])

(defn double-matrix [xs]
(let [arr #^doubles
(make-array Double/TYPE (count xs) (count (first xs)))]
(dotimes [idx (count xs)]
(-> #^doubles arr
(aset (int idx)
(double-array (nth xs idx)))))
arr))
12 changes: 12 additions & 0 deletions src/infer/linear_models.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
(ns infer.features
(:import org.apache.commons.math.stat.regression.OLSMultipleLinearRegression))

(defn ols-linear-model [ys xs]
(doto
(OLSMultipleLinearRegression.)
(.newSampleData (double-array ys)
(double-matrix xs))))

(defn betas [m]
"get the betas from an OLSMultipleLinearRegression model."
(into [] (.estimateRegressionParameters m)))
12 changes: 12 additions & 0 deletions src/infer/matrix.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
(ns infer.matrix
(:refer-clojure :exclude (inc))
(:use incanter.core))

(defn inc
([m x y] (inc m x y 1))
([m x y amount]
(let [m2 (matrix m)]
(.set m2 x y (+ (.getQuick m x y) amount))
m2)))


72 changes: 72 additions & 0 deletions test/infer/features_test.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
(ns infer.features-test
(:use clojure.test)
(:use infer.features))

(deftest flatten-seqs-test
(is (= [[1 2] [3 4]]
(flatten-seqs [[[1 2]] [[[3 4]]]])))
(is (= [[0 1 1 1] [0 1 2 1] [0 1 2 1] [0 1 2 1] [0 2 1 1]]
(flatten-seqs
[[[[[0 1 1 1]] [[0 1 2 1] [0 1 2 1] [0 1 2 1]]] [[[0 2 1 1]]]]]))))

(deftest remove-at-test
(is (= [0 1 3]
(remove-at 2 [0 1 2 3])))
(is (= [1 2 3]
(remove-at 0 [0 1 2 3])))
(is (= [0 1 2]
(remove-at 3 [0 1 2 3]))))

(deftest vec-but-last-test
(is (= [1 2 3]
(vec-but-last [1 2 3 4]))))

(deftest count-when-test
(let [data [[0 6 0 2 6]
[0 6 0 2 6]
[0 6 0 2 6]
[0 6 0 2 6]]]

(is (= 0
(count-when
(partial nth-is? 2 #(> % 0))
data)))
(is (= 0
(count-when
(fn [c] (not (some #(> % 0) c)))
data)))))

(deftest make-feature-vectors
(is (=
[[0 1 1]
[0 1 2]
[0 1 2]
[0 1 2]
[0 2 1]]
(feature-vectors
{0 {1 {1 1 2 3}
2 {1 1}}})))
(is (=
[[0 1 1]
[0 1 2]
[0 1 2]
[0 2 1]
[2 1 2]
[2 1 2]]
(feature-vectors
{0 {1 {1 1 2 2}
2 {1 1}}
2 {1 {2 2}}})))
(is (=
[[0 1 1]
[0 1 2]
[0 1 2]
[0 2 1]
[0 0 1]
[2 1 2]
[2 1 2]]
(feature-vectors
{0 {1 {1 1 2 2}
2 {1 1}
:missing {1 1}}
2 {1 {2 2}}}))))
11 changes: 11 additions & 0 deletions test/infer/matrix_test.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
(ns infer.matrix-test
(:use clojure.contrib.test-is
incanter.core)
(:require (infer [matrix :as m])))

(deftest inc-test
(let [A (matrix 0 3 3)]
(is (= (matrix [[1 0 0][0 0 0][0 0 0]])
(m/inc A 0 0)))
(is (= (matrix [[2 0 0][0 0 0][0 0 0]])
(m/inc A 0 0 2)))))

0 comments on commit 79962c3

Please sign in to comment.