Permalink
Browse files

Let the models write themselves to a file

  • Loading branch information...
1 parent 803bf81 commit b728906888ba472dee0f25df936b81a6cd142139 @camilo committed Sep 23, 2011
Showing with 80 additions and 8 deletions.
  1. +13 −0 ext/svmredlight.c
  2. +31 −8 lib/svmredlight/model.rb
  3. +1 −0 test/helper.rb
  4. +35 −0 test/test_model.rb
View
@@ -638,6 +638,18 @@ model_support_vectors_count(VALUE self){
}
static VALUE
+model_write_to_file(VALUE self, VALUE pahtofile){
+ Check_Type(pahtofile, T_STRING);
+
+ MODEL *m;
+ Data_Get_Struct(self, MODEL, m);
+
+ write_model(StringValuePtr(pahtofile), m);
+
+ return Qnil;
+}
+
+static VALUE
model_total_words(VALUE self){
MODEL *m;
Data_Get_Struct(self, MODEL, m);
@@ -750,6 +762,7 @@ Init_svmredlight(){
rb_cModel = rb_define_class_under(rb_mSvmLight, "Model", rb_cObject);
rb_define_singleton_method(rb_cModel, "from_file", model_read_from_file, 1);
rb_define_singleton_method(rb_cModel, "learn_classification", model_learn_classification, 5);
+ rb_define_method(rb_cModel, "to_file", model_write_to_file, 1);
rb_define_method(rb_cModel, "support_vectors_count", model_support_vectors_count, 0);
rb_define_method(rb_cModel, "total_words", model_total_words, 0);
rb_define_method(rb_cModel, "classify", model_classify_example, 1);
@@ -1,6 +1,6 @@
module SVMLight
- class MissingModelFile < StandardError;end
+ class MissingModelFile < StandardError; end
# A model is the product of training a SVM, once created it can take documents as inputs
# and act of them (by for instance classifying them). Models can also be read from files
@@ -19,26 +19,49 @@ def self.new(type, documents_and_lables, learn_params, kernel_params, alphas = n
learn_classification(documents_and_lables, learn_params, kernel_params, false, alphas)
end
+
private_class_method :learn_classification
private_class_method :from_file
-
+
+ # in self.read_from_file and #write_to_file
+ #
+ # This is an anti-pattern. Checking for existence of resources is normally something to be avoided. Trying to open
+ # the resource and then rescuing the exception/reading the error code is a much better practice, however SVMLight
+ # will call exit(1) if the file does not exists, and, that in turn will kill the ruby VM, so in this case to
+ # minimize that possibility I'm optimistically check for the file existence and hope it is still there when it is
+ # actually time to open it.
+ #
+ # TODO: Come up with a proper replacement for those methods, probably simply reimplementing them in svmredlight.c
+ # and raising an exception when files cannot be open.
# Will load an existen model from a file
# @param [String] pahtofile path to the model file
def self.read_from_file(pahtofile)
-
- # This is an antipattern, checking for existence of resoruces is normally somethig to be avoided, trying to open
- # and rescuing the exception/reading the error code is a much better practice, however SVMLight will call exit(1)
- # if the file does not exists and that in turn will kill the ruby VM, so in this case to minimize that possibility
- # I'm optimistically check for the file existence and hope it is still there when it is actually open, in the end
- # reimplementing the whole thing in C would be better.
if File.exists?(pahtofile) && File.file?(pahtofile)
from_file(pahtofile)
+
else
+
raise MissingModelFile, "the #{pahtofile} does not exists or is not a file"
end
end
+ private :to_file
+
+ def write_to_file(pahtofile)
+ dir = File.dirname(pahtofile)
+
+ if File.directory?(dir) && File.writable?(dir)
+ to_file(pahtofile)
+
+ else
+ raise ModelWriteError, "impossible to write #{pahtofile}"
+
+ end
+ end
+
+
+
end
end
View
@@ -8,6 +8,7 @@
$stderr.puts "Run `bundle install` to install missing gems"
exit e.status_code
end
+
require 'test/unit'
require 'shoulda'
require './lib/svmredlight'
View
@@ -28,6 +28,41 @@ class TestModel < Test::Unit::TestCase
end
end
+ context "writting a model to a file" do
+ setup do
+ @features ||= [
+ [ [1,0.6], [11, 0.0], [34, 0.1] ],
+ [ [5,0.4], [15, 0.0], [30, 0.1] ],
+ [ [1,0.1], [13, 0.0], [31, 0.1] ],
+ [ [7,0.7], [15, 0.0], [35, 0.1] ],
+ [ [5,0.6], [19, 0.0], [44, 0.1] ],
+ ]
+
+ @docs_and_labels ||= @features.each_with_index.map do |feature, index|
+ [ Document.create(index + 1, 1, 0, 0, feature), index%2 * -1]
+ end
+
+ @filepath = './test/assets/written_model'
+ @model = Model.new(:classification, @docs_and_labels, {}, {}, nil)
+ end
+
+ should "write a model from memmory to a file" do
+ @model.write_to_file(@filepath)
+
+ assert File.exists?(@filepath)
+ assert File.file?(@filepath)
+ # TODO: Implement actual model equality
+ assert_equal @model.support_vectors_count, Model.read_from_file(@filepath).support_vectors_count
+ end
+
+ # Need to find a good way to test this without relaying too much in the environment
+ should "raise ModelWriteError when it is impossible to write a model file"
+
+ teardown do
+ `rm #{@filepath} &> /dev/null`
+ end
+ end
+
context "when learning from new documents" do
setup do

0 comments on commit b728906

Please sign in to comment.