Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

dimionsionality refactoring

  • Loading branch information...
commit 5a87355030aaad54e0f0e35935b83ec144874a6a 1 parent 68c9d39
@chochkov authored
View
2  Gemfile
@@ -3,6 +3,4 @@ source :rubygems
group :test do
gem 'rspec'
gem 'guard-rspec'
- gem 'guard-spork'
- gem 'spork', '> 0.9.0.rc'
end
View
6 Gemfile.lock
@@ -6,9 +6,6 @@ GEM
thor (~> 0.14.6)
guard-rspec (0.5.4)
guard (>= 0.8.4)
- guard-spork (0.3.1)
- guard (>= 0.8.4)
- spork (>= 0.8.4)
rspec (2.7.0)
rspec-core (~> 2.7.0)
rspec-expectations (~> 2.7.0)
@@ -17,7 +14,6 @@ GEM
rspec-expectations (2.7.0)
diff-lcs (~> 1.1.2)
rspec-mocks (2.7.0)
- spork (0.9.0.rc9)
thor (0.14.6)
PLATFORMS
@@ -25,6 +21,4 @@ PLATFORMS
DEPENDENCIES
guard-rspec
- guard-spork
rspec
- spork (> 0.9.0.rc)
View
17 Guardfile
@@ -1,19 +1,8 @@
# A sample Guardfile
# More info at https://github.com/guard/guard#readme
-guard 'spork', :cucumber_env => { 'RAILS_ENV' => 'test' }, :rspec_env => { 'RAILS_ENV' => 'test' } do
- watch('config/application.rb')
- watch('config/environment.rb')
- watch(%r{^config/environments/.+\.rb$})
- watch(%r{^config/initializers/.+\.rb$})
- watch('Gemfile')
- watch('Gemfile.lock')
- watch('spec/spec_helper.rb')
- watch('test/test_helper.rb')
-end
-
-guard 'rspec', :version => 2 do
- watch('lib/*.rb')
- watch('spec/*.rb')
+guard 'rspec', :version => 2, :cli => '--color' do
+ watch(/^lib\/(.+)\.rb$/) { |m| "spec/#{m[1]}_spec.rb" }
+ watch(%r{^spec/.+_spec\.rb$})
end
View
3  README
@@ -0,0 +1,3 @@
+Ruby implementations of clusterisation and classification algorithms such as
+kNN, Kmeans, Expectation maximization, Bayesian techniques, etc.
+
View
1  init.rb
@@ -1,6 +1,7 @@
require './lib/vector'
require './lib/vectors'
require './lib/kmeans'
+require './lib/errors'
include Mustererkennung
View
2  lib/errors.rb
@@ -0,0 +1,2 @@
+class DimensionMismatch < ArgumentError; end
+
View
11 lib/kmeans.rb
@@ -14,12 +14,9 @@ def initialize(opts = {})
file.read.split("\n")
end.map(&:split).map { |row| row.map(&:to_i) }
- @data = @data.inject(Vectors.new) do |memo, row|
- row.first(16).each_slice(2) do |x, y|
- memo << Vector.new(x, y, row.last)
- end
- memo
- end
+ @data = Vectors.new(@data.map do |row|
+ Vector.new({ :label => row.last }, *row.first(16))
+ end)
@iteration = 0
@centroids = { iteration => data.sample(k) }
@@ -49,7 +46,7 @@ def cluster!
def centroids
if clusters
@centroids[iteration] ||= clusters.inject([]) do |memo, pair|
- memo.push(pair.last.centroid)
+ memo.push(pair.last.mean)
memo
end
else
View
125 lib/vector.rb
@@ -1,64 +1,119 @@
-# 2 dimensional Vector with label
+# A multidimensional vector.
+#
+require "#{File.dirname(__FILE__)}/vector_class_methods"
+
module Mustererkennung
class Vector
- attr_accessor :elements, :label
+ attr_accessor :label, :elements, :dimension
- def initialize(label = nil, *elements)
- @elements = elements
- @label = label
- end
+ alias :size :dimension
- # Euclidean distance between self and other
- def distance(other)
- Math.sqrt([ elements, other ].inject(0) { |memo, element|
- memo += (element - memo.delete_at(0)) ** 2
- memo
- })
+ extend VectorClassMethods
+
+ def initialize(opts = {}, *args, &block)
+ if opts.kind_of? Hash
+ self.label = opts[:label]
+ arguments = args.flatten
+ else
+ arguments = [ opts, args ].flatten
+ end
+
+ self.elements =
+ if block_given? && Vector.same_dimension?(arguments)
+ self.label = defined?(@label) ? @label : arguments.first.label
+ Vector.elementwise(arguments, &block)
+ else
+ arguments
+ end
+
+ self.dimension = elements.size
end
# elementwise sumation or sum of vectors
- def +(other)
- if other.kind_of? Fixnum
- @x += other
- @y += other
- self
- elsif other.kind_of? Vector
- Vector.new(@x + other.x, @y + other.y, label = other)
+ def +(arg)
+ if Vector.vector?(arg)
+ Vector.new({ :label => common_label(arg) }, self, arg) { |x, y| x + y }
+ else
+ Vector.new({ :label => label }, map { |e| e + arg })
+ end
+ end
+
+ # elementwise difference or difference of vectors
+ def -(arg)
+ if Vector.vector?(arg)
+ Vector.new({ :label => common_label(label)}, self, arg) { |x, y| x - y }
+ else
+ Vector.new({ :label => label }, map { |e| e - arg })
end
end
# elementwise multiplication or outer product
def *(other)
if other.kind_of? Fixnum
- @x *= other
- @y *= other
- self
+ Vector.new({ :label => label }, map { |e| e * other })
elsif other.kind_of? Vector
- x * other.x + y * other.y
+ inject(0, other) do |memo, x, y|
+ memo += x * y
+ end
end
end
# elementwise division
def /(denominator)
- @x /= denominator.to_f
- @y /= denominator.to_f
- self
+ Vector.new({ :label => label }, map { |e| e / denominator })
end
- def label=(other)
- if label == other.label
- label
- elsif label.nil? && ! other.label.nil?
- other.label
- elsif ! label.nil? && other.label.nil?
+ def distance(other)
+ Math.sqrt(inject(0, other) { |memo, x, y|
+ memo += (x - y) ** 2
+ })
+ end
+
+ def common_label(other)
+ if other.label == label
label
else
- nil
+ labels = [ label, other.label ].compact
+ if labels.size < 2
+ labels.first
+ else
+ nil
+ end
+ end
+ end
+
+ def same_dimension?(*vectors)
+ Vector.same_dimension? self, *vectors
+ end
+
+ def [](i)
+ elements[i]
+ end
+
+ def ==(*others)
+ others.flatten.all? do |vector|
+ vector.elements == elements
end
end
- def ==(other)
- x == other.x && y == other.y
+ def push(*elements)
+ self.elements.push(*elements)
+ self.dimension += elements.size
+ self
+ end
+
+ def all?(&block); elements.all? &block; end
+
+ def any?(&block); elements.any? &block; end
+
+ def map(&block); elements.map &block; end
+
+ def inject(start, *vectors)
+ raise DimensionMismatch unless same_dimension?(vectors)
+
+ (0..dimension - 1).inject(start) do |memo, i|
+ yield(memo, *[self[i], *vectors.map { |vector| vector[i] }])
+ end
end
# Attribute vector to the nearest centroid.
View
28 lib/vector_class_methods.rb
@@ -0,0 +1,28 @@
+module Mustererkennung
+ module VectorClassMethods
+ def vectors_of_same_dimension?(*args)
+ vector?(args) && same_dimension?(args)
+ end
+
+ def vector?(*args)
+ args.flatten.any? && args.flatten.all? do |arg|
+ arg.kind_of? Vector
+ end
+ end
+
+ def same_dimension?(*vectors)
+ dimension = vectors.flatten.first.dimension
+ vectors.flatten.all? do |vector|
+ dimension == vector.dimension
+ end && dimension
+ end
+
+ def elementwise(*vectors)
+ dimension = vectors.flatten.first.dimension
+ @elements = (0..dimension - 1).inject([]) do |memo, i|
+ memo.push yield(*vectors.flatten.map { |vector| vector[i] } )
+ end
+ end
+ end
+end
+
View
64 lib/vectors.rb
@@ -1,39 +1,41 @@
-# A collection of 2 dimensional vectors
+# A collection of vectors with the same dimensions.
+#
class Vectors
include Enumerable
- attr_accessor :data
+ attr_accessor :vectors, :dimension
- def initialize(array = [], label = nil)
- @data = if array.empty? || array.all? { |e| e.kind_of? Vector }
- array
- elsif array.all? { |e| e.size == 2 }
- array.map { |pair| Vector.new *pair, label }
- elsif array.all? { |e| e.size == 3 }
- array.map { |triple| Vector.new *triple }
- else
- raise ArgumentError.new "#{array} given, expected vectors or triplets"
- end
+ def initialize(*args)
+ if args.flatten.any?
+ raise DimensionMismatch unless Vector.vectors_of_same_dimension?(args)
+ @dimension = args.flatten.first.dimension
+ end
+ @dimension ||= 16
+ @vectors = args.flatten
end
- def first
- @data.first
+ def first(arg = nil)
+ if arg
+ @vectors.first(arg)
+ else
+ @vectors.first
+ end
end
def last
- @data.last
+ @vectors.last
end
- def [](count)
- @data[count]
+ def [](index)
+ @vectors[index]
end
def each
- @data.each
+ @vectors.each
end
def <<(other)
if other.kind_of?(Vector)
- @data << other
+ @vectors << other
else
raise ArgumentError.new("Vector expected #{other} received.")
end
@@ -42,32 +44,42 @@ def <<(other)
# Returns new Vectors object containing the vectors
# from self and other
def +(other)
- Vectors.new(@data + other.data)
+ Vectors.new(@vectors + other.vectors)
end
def sample(*args)
- @data.sample(*args)
+ @vectors.sample(*args)
end
def size
- @data.size
+ @vectors.size
end
def sum
- @data.inject(Vector.new(0, 0)) do |memo, vector|
+ a = Array.new(dimension, 0)
+ inject(Vector.new(a)) do |memo, vector|
memo += vector
memo
end
end
def mean
- sum / @data.size
+ sum / @vectors.size.to_f
end
- alias :centroid :mean
+ def label
+ @vectors.inject(Hash.new(0)) do |memo, vector|
+ memo[vector.label] += 1
+ memo
+ end.max_by { |label, count| count }.first
+ end
def sort_by! &block
- @data.sort_by! &block
+ @vectors.sort_by! &block
+ end
+
+ def inject(start, &block)
+ @vectors.inject(start, &block)
end
end
View
45 spec/spec_helper.rb
@@ -1,45 +1,8 @@
require 'rubygems'
-require 'spork'
+require File.join(File.dirname(__FILE__), '..', 'init.rb')
-Spork.prefork do
- # Loading more in this block will cause your tests to run faster. However,
- # if you change any configuration or code from libraries loaded here, you'll
- # need to restart spork for it take effect.
-
- require File.join(File.dirname(__FILE__), '..', 'init.rb')
+Rspec.configure do |c|
+ c.filter_run :focus => true
+ c.run_all_when_everything_filtered = true
end
-Spork.each_run do
- # This code will be run each time you run your specs.
-
-end
-
-# --- Instructions ---
-# Sort the contents of this file into a Spork.prefork and a Spork.each_run
-# block.
-#
-# The Spork.prefork block is run only once when the spork server is started.
-# You typically want to place most of your (slow) initializer code in here, in
-# particular, require'ing any 3rd-party gems that you don't normally modify
-# during development.
-#
-# The Spork.each_run block is run each time you run your specs. In case you
-# need to load files that tend to change during development, require them here.
-# With Rails, your application modules are loaded automatically, so sometimes
-# this block can remain empty.
-#
-# Note: You can modify files loaded *from* the Spork.each_run block without
-# restarting the spork server. However, this file itself will not be reloaded,
-# so if you change any of the code inside the each_run block, you still need to
-# restart the server. In general, if you have non-trivial code in this file,
-# it's advisable to move it into a separate file so you can easily edit it
-# without restarting spork. (For example, with RSpec, you could move
-# non-trivial code into a file spec/support/my_helper.rb, making sure that the
-# spec/support/* files are require'd from inside the each_run block.)
-#
-# Any code that is left outside the two blocks will be run during preforking
-# *and* during each_run -- that's probably not what you want.
-#
-# These instructions should self-destruct in 10 seconds. If they don't, feel
-# free to delete them.
-
View
31 spec/vector_class_methods_spec.rb
@@ -0,0 +1,31 @@
+describe "Vector class methods" do
+ describe "Vector.same_dimension_vectors?" do
+ it "should be true for same dimensions vectors" do
+ a = Vector.new 0, 1, 2
+ b = Vector.new 9, 2, 4
+ Vector.same_dimension?(a, b).should == 3
+ Vector.same_dimension?([a, b]).should be_true
+
+ c = Vector.new({:label => 0}, 3, 4, 6)
+ Vector.same_dimension?(a, c).should == 3
+ end
+
+ it "" do
+ Vector.same_dimension?(Vector.new 0, 1).should == 2
+ end
+
+ it "should be false for different dimensions" do
+ a = Vector.new({:label => 0}, 0, 1, 2)
+ b = Vector.new 9, 2, 4, 6
+ Vector.same_dimension?(a, b).should be_false
+ end
+ end
+
+ describe "Vector.vector?" do
+ it "should be true for vectors and false otherwise" do
+ Vector.vector?(Vector.new).should be_true
+ Vector.vector?.should be_false
+ Vector.vector?([ 2, 3 ]).should be_false
+ end
+ end
+end
View
78 spec/vector_spec.rb
@@ -1,15 +1,45 @@
require 'spec_helper'
describe Vector do
- it "can be initialized" do
- a = Vector.new(8, 1, 2)
+ it "is initialized through a label and a list of arguments" do
+ a = Vector.new({ :label => 8 }, 1, 2)
a.label.should == 8
a.elements.first.should == 1
a.elements.last.should == 2
+
+ a = Vector.new(1, 2, 3)
+ a.dimension.should == 3
+
a = Vector.new
- a.elements.should == nil
+ a.elements.should == []
a.label.should == nil
- Vector.new(nil, *[ 1, 2 ]).should == Vector.new(nil, 1, 2)
+ end
+
+ it "is initialized through a list of vectors and a block" do
+ a = Vector.new({ :label => 0 }, 1, 2, 3)
+ b = Vector.new({ :label => 0 }, 4, 6, 8)
+ c = Vector.new a, b do |x, y|
+ x + y
+ end
+ c.dimension.should == 3
+ c.elements[0].should == 5
+ c.elements[1].should == 8
+ c.elements[2].should == 11
+ c.label.should == 0
+ end
+
+ it "should accept the given label when constructed from elementwise operation on vectors", do
+ a = Vector.new(1, 2)
+ b = Vector.new(4, 6)
+ c = Vector.new(3, 5)
+ d = Vector.new({:label => 0}, a, b, c) do |x, y, z|
+ x + y + z
+ end
+
+ d.label.should == 0
+ d.dimension.should == 2
+ d.elements[0].should == 8
+ d.elements[1].should == 13
end
it "should calculate Eucledian distance" do
@@ -22,17 +52,41 @@
(Vector.new(2, 3) * 3).should == Vector.new(6, 9)
end
- it "should sum vectors and preserve labels" do
- a = Vector.new(1, 2, 9) + Vector.new(3, 4, 9)
- a.should == Vector.new(4, 6, 9)
- a = Vector.new(1, 2) + Vector.new(3, 4, 9)
- a.label.should == 9
- a = Vector.new(1, 2, 9) + Vector.new(3, 4)
- a.label.should == 9
+ it "should sum vectors and preserve common or not nil labels" do
+ a = Vector.new(1, 2, 9) + Vector.new({ :label => 0 }, 3, 4, 9)
+ a.label.should == 0
+ a.elements.should == [ 4, 6, 18 ]
+
+ a = Vector.new({ :label => 0 }, 0, 2) + Vector.new({ :label => 1 }, 0, 2)
+ a.label.should == nil
+ a.elements.should == [ 0, 4 ]
+
+ a = Vector.new({ :label => 0 }, 0, 2) + Vector.new(0, 2)
+ a.label.should == 0
+ a.elements.should == [ 0, 4 ]
+
+ a = Vector.new(0, 2) + Vector.new(0, 2)
+ a.label.should == nil
+ a.elements.should == [ 0, 4 ]
+ end
+
+ it "should sum with a nil label if both labels are different", :focus => false do
+ a = Vector.new({ :label => 0 }, 1)
+ b = Vector.new({ :label => 1 }, 2)
+ c = a + b
+ c.label.should == nil
end
it "should divide by number" do
- (Vector.new(4, 6) / 2).should == Vector.new(2, 3)
+ a = Vector.new(4, 6)
+ b = a / 2
+ b.kind_of?(Vector).should be_true
+ b.elements.should == [ 2, 3 ]
+
+ a = Vector.new({ :label => 0 })
+ b = a / 2
+ b.elements.should == []
+ b.label.should == 0
end
it "should cluster properly" do
View
50 spec/vectors_spec.rb
@@ -1,34 +1,29 @@
require 'spec_helper'
describe Vectors do
- it "should be initialized from array of triples each meaning x, y and label" do
- a = Vectors.new([ [1, 2, 8], [3, 4, 0] ])
- a.first.should == Vector.new(1, 2, 8)
- a.last.should == Vector.new(3, 4, 0)
- end
-
- it "should be initiialized from array of pairs and a label" do
- a = Vectors.new([ [1, 2], [2, 3] ], 8)
- a.first.should == Vector.new(1, 2, 8)
- a.last.should == Vector.new(2, 3, 8)
- end
-
- it "should be initialized from a collection of vectors" do
- a = Vector.new 1, 2
- b = Vector.new 3, 5
- vectors = Vectors.new([a, b])
- vectors.first.should == a
- vectors.last.should == b
+ it "should be initialized with a label and a list of elements" do
+ a = Vectors.new(Vector.new(2, 3), Vector.new(3, 4))
+ a.first.should == Vector.new(2, 3)
+ a.last.should == Vector.new(3, 4)
end
it "could be empty" do
Vectors.new.size == 0
end
+ it "cant be initialized with vectors of different dimensions" do
+ lambda {
+ Vectors.new(Vector.new(1), Vector.new(-2, 3))
+ }.should raise_error(DimensionMismatch)
+ end
+
it "should correctly give the mean of a collection" do
- a = Vectors.new([ [2, 3, nil], [4, 5, nil] ])
- a.mean.should == Vector.new(3, 4)
- a.mean.should == a.centroid
+ a = Vector.new 1, 2
+ b = Vector.new 3, 5
+ vectors = Vectors.new([a, b])
+ m = vectors.mean
+ m.kind_of?(Vector).should be_true
+ m.elements.should == [ 2, 3.5 ]
end
it "should accept new vectors" do
@@ -37,5 +32,18 @@
a << Vector.new(2, 3)
a.size.should == 1
end
+
+ it "should give the right label" do
+ a = Vector.new({ :label => 0 }, 1, 2, 3)
+ b = Vector.new({ :label => 0 }, 3, 5, 6)
+ c = Vector.new({ :label => 1 }, 4, 4, 4)
+ vectors = Vectors.new(a, b, c)
+ vectors.label.should == 0
+
+ d = Vector.new({ :label => 1 }, 2, 3, 5)
+ e = Vector.new({ :label => 1 }, 2, 3, 5)
+ vectors = Vectors.new(a, b, c, d, e)
+ vectors.label.should == 1
+ end
end
Please sign in to comment.
Something went wrong with that request. Please try again.