Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

work with sparse data

  • Loading branch information...
commit 2681aebe5038efea0caf35b013f541b035ef93be 1 parent 2f2325f
authored February 18, 2011
28  README.rdoc
Source Rendered
@@ -8,6 +8,8 @@ Provides bindings to K-Means clustering in Cluster 3.0
8 8
 
9 9
 == Synopsis
10 10
 
  11
+=== Numeric Data
  12
+
11 13
   require 'flock'
12 14
   
13 15
   data     = Array.new(13) {[]}
@@ -67,6 +69,32 @@ Provides bindings to K-Means clustering in Cluster 3.0
67 69
     weights:   Array.new(13) {1.0},
68 70
   )
69 71
 
  72
+
  73
+=== Sparse and Non-Numeric data
  74
+
  75
+  require 'pp'
  76
+  require 'flock'
  77
+  
  78
+  data = []
  79
+  data << { apple:  1, orange: 1 }
  80
+  data << { black:  1, white:  1 }
  81
+  data << { white:  1, cyan:   1 }
  82
+  data << { orange: 1 }
  83
+  data << { apple:  1 }
  84
+  
  85
+  pp Flock.sparse_kmeans(2, data)
  86
+
  87
+  # or even more simply (defaults to 1)
  88
+  
  89
+  data = []
  90
+  data << %w(apple orange)
  91
+  data << %w(black white)
  92
+  data << %w(white cyan)
  93
+  data << %w(orange)
  94
+  data << %w(apple)
  95
+  
  96
+  pp Flock.sparse_kmeans(2, data)
  97
+  
70 98
 == TODO
71 99
 
72 100
 Bindings to,
2  VERSION
... ...
@@ -1 +1 @@
1  
-0.1.0
  1
+0.2.0
0  examples/example.rb 100644 → 100755
File mode changed
22  examples/sparse.rb
... ...
@@ -0,0 +1,22 @@
  1
+#!/usr/bin/ruby
  2
+
  3
+require 'pp'
  4
+require 'flock'
  5
+
  6
+data = []
  7
+data << { apple:  1, orange: 1 }
  8
+data << { black:  1, white:  1 }
  9
+data << { white:  1, cyan:   1 }
  10
+data << { orange: 1 }
  11
+data << { apple:  1 }
  12
+
  13
+pp Flock.sparse_kmeans(2, data)
  14
+
  15
+data = []
  16
+data << %w(apple orange)
  17
+data << %w(black white)
  18
+data << %w(white cyan)
  19
+data << %w(orange)
  20
+data << %w(apple)
  21
+
  22
+pp Flock.sparse_kmeans(2, data)
2  ext/flock.c
@@ -35,7 +35,7 @@ VALUE rb_kmeans(int argc, VALUE *argv, VALUE self) {
35 35
         ccentroid_mask[i] = (int   *)malloc(sizeof(int   )*ncols);
36 36
         for (j = 0; j < ncols; j++) {
37 37
             cdata[i][j] = NUM2DBL(rb_ary_entry(rb_ary_entry(data, i), j));
38  
-            cmask[i][j] = NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
  38
+            cmask[i][j] = NIL_P(mask) ? 1 : NUM2INT(rb_ary_entry(rb_ary_entry(mask, i), j));
39 39
         }
40 40
     }
41 41
 
50  flock.gemspec
... ...
@@ -0,0 +1,50 @@
  1
+# Generated by jeweler
  2
+# DO NOT EDIT THIS FILE DIRECTLY
  3
+# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
  4
+# -*- encoding: utf-8 -*-
  5
+
  6
+Gem::Specification.new do |s|
  7
+  s.name = %q{flock}
  8
+  s.version = "0.2.0"
  9
+
  10
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
  11
+  s.authors = ["Bharanee Rathna"]
  12
+  s.date = %q{2011-02-18}
  13
+  s.description = %q{A thin ruby binding to Cluster 3.0}
  14
+  s.email = ["deepfryed@gmail.com"]
  15
+  s.extensions = ["ext/extconf.rb"]
  16
+  s.extra_rdoc_files = [
  17
+    "README.rdoc"
  18
+  ]
  19
+  s.files = [
  20
+    "README.rdoc",
  21
+     "Rakefile",
  22
+     "VERSION",
  23
+     "ext/cluster.c",
  24
+     "ext/cluster.h",
  25
+     "ext/extconf.rb",
  26
+     "ext/flock.c",
  27
+     "flock.gemspec",
  28
+     "lib/flock.rb"
  29
+  ]
  30
+  s.homepage = %q{http://github.com/deepfryed/flock}
  31
+  s.rdoc_options = ["--charset=UTF-8"]
  32
+  s.require_paths = ["lib"]
  33
+  s.rubygems_version = %q{1.3.7}
  34
+  s.summary = %q{Ruby bindings to Cluster 3.0.}
  35
+  s.test_files = [
  36
+    "examples/sparse.rb",
  37
+     "examples/example.rb"
  38
+  ]
  39
+
  40
+  if s.respond_to? :specification_version then
  41
+    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
  42
+    s.specification_version = 3
  43
+
  44
+    if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
  45
+    else
  46
+    end
  47
+  else
  48
+  end
  49
+end
  50
+
35  lib/flock.rb
... ...
@@ -0,0 +1,35 @@
  1
+require_relative '../ext/flock'
  2
+module Flock
  3
+
  4
+  def self.sparse_hash_to_data sparse_data
  5
+    dims = Hash[sparse_data.map(&:keys).flatten.uniq.map.with_index{|k,v| [k,v]}]
  6
+    data = sparse_data.map do |sv|
  7
+      vector = Array.new(dims.size) {0}
  8
+      sv.each {|k,v| vector[dims[k]] = v }
  9
+      vector
  10
+    end
  11
+    [dims,data]
  12
+  end
  13
+
  14
+  def self.sparse_array_to_data sparse_data
  15
+    dims = Hash[sparse_data.flatten.uniq.map.with_index{|k,v| [k,v]}]
  16
+    data = sparse_data.map do |sv|
  17
+      vector = Array.new(dims.size) {0}
  18
+      sv.each {|k| vector[dims[k]] = 1 }
  19
+      vector
  20
+    end
  21
+    [dims,data]
  22
+  end
  23
+
  24
+  def self.sparse_kmeans size, sparse_data, options={}
  25
+    dims, data = sparse_data[0].kind_of?(Array) ? sparse_array_to_data(sparse_data) : sparse_hash_to_data(sparse_data)
  26
+
  27
+    if options.key?(:weights)
  28
+      weights = Array.new(dims.size) {1}
  29
+      options[:weights].each {|k,v| weights[dims[k]] = v }
  30
+      options[:weights] = weights
  31
+    end
  32
+
  33
+    kmeans(size, data, nil, options)
  34
+  end
  35
+end

0 notes on commit 2681aeb

Please sign in to comment.
Something went wrong with that request. Please try again.