Skip to content

Commit

Permalink
Let the models write themselves to a file
Browse files Browse the repository at this point in the history
  • Loading branch information
camilo committed Sep 23, 2011
1 parent 803bf81 commit b728906
Show file tree
Hide file tree
Showing 4 changed files with 80 additions and 8 deletions.
13 changes: 13 additions & 0 deletions ext/svmredlight.c
Expand Up @@ -637,6 +637,18 @@ model_support_vectors_count(VALUE self){
return INT2FIX(m->sv_num);
}

static VALUE
model_write_to_file(VALUE self, VALUE pahtofile){
Check_Type(pahtofile, T_STRING);

MODEL *m;
Data_Get_Struct(self, MODEL, m);

write_model(StringValuePtr(pahtofile), m);

return Qnil;
}

static VALUE
model_total_words(VALUE self){
MODEL *m;
Expand Down Expand Up @@ -750,6 +762,7 @@ Init_svmredlight(){
rb_cModel = rb_define_class_under(rb_mSvmLight, "Model", rb_cObject);
rb_define_singleton_method(rb_cModel, "from_file", model_read_from_file, 1);
rb_define_singleton_method(rb_cModel, "learn_classification", model_learn_classification, 5);
rb_define_method(rb_cModel, "to_file", model_write_to_file, 1);
rb_define_method(rb_cModel, "support_vectors_count", model_support_vectors_count, 0);
rb_define_method(rb_cModel, "total_words", model_total_words, 0);
rb_define_method(rb_cModel, "classify", model_classify_example, 1);
Expand Down
39 changes: 31 additions & 8 deletions lib/svmredlight/model.rb
@@ -1,6 +1,6 @@
module SVMLight

class MissingModelFile < StandardError;end
class MissingModelFile < StandardError; end

# A model is the product of training a SVM, once created it can take documents as inputs
# and act of them (by for instance classifying them). Models can also be read from files
Expand All @@ -19,26 +19,49 @@ def self.new(type, documents_and_lables, learn_params, kernel_params, alphas = n

learn_classification(documents_and_lables, learn_params, kernel_params, false, alphas)
end

private_class_method :learn_classification
private_class_method :from_file


# in self.read_from_file and #write_to_file
#
# This is an anti-pattern. Checking for existence of resources is normally something to be avoided. Trying to open
# the resource and then rescuing the exception/reading the error code is a much better practice, however SVMLight
# will call exit(1) if the file does not exists, and, that in turn will kill the ruby VM, so in this case to
# minimize that possibility I'm optimistically check for the file existence and hope it is still there when it is
# actually time to open it.
#
# TODO: Come up with a proper replacement for those methods, probably simply reimplementing them in svmredlight.c
# and raising an exception when files cannot be open.

# Will load an existen model from a file
# @param [String] pahtofile path to the model file
def self.read_from_file(pahtofile)

# This is an antipattern, checking for existence of resoruces is normally somethig to be avoided, trying to open
# and rescuing the exception/reading the error code is a much better practice, however SVMLight will call exit(1)
# if the file does not exists and that in turn will kill the ruby VM, so in this case to minimize that possibility
# I'm optimistically check for the file existence and hope it is still there when it is actually open, in the end
# reimplementing the whole thing in C would be better.
if File.exists?(pahtofile) && File.file?(pahtofile)
from_file(pahtofile)

else

raise MissingModelFile, "the #{pahtofile} does not exists or is not a file"
end

end

private :to_file

def write_to_file(pahtofile)
dir = File.dirname(pahtofile)

if File.directory?(dir) && File.writable?(dir)
to_file(pahtofile)

else
raise ModelWriteError, "impossible to write #{pahtofile}"

end
end



end
end
1 change: 1 addition & 0 deletions test/helper.rb
Expand Up @@ -8,6 +8,7 @@
$stderr.puts "Run `bundle install` to install missing gems"
exit e.status_code
end

require 'test/unit'
require 'shoulda'
require './lib/svmredlight'
Expand Down
35 changes: 35 additions & 0 deletions test/test_model.rb
Expand Up @@ -28,6 +28,41 @@ class TestModel < Test::Unit::TestCase
end
end

context "writting a model to a file" do
setup do
@features ||= [
[ [1,0.6], [11, 0.0], [34, 0.1] ],
[ [5,0.4], [15, 0.0], [30, 0.1] ],
[ [1,0.1], [13, 0.0], [31, 0.1] ],
[ [7,0.7], [15, 0.0], [35, 0.1] ],
[ [5,0.6], [19, 0.0], [44, 0.1] ],
]

@docs_and_labels ||= @features.each_with_index.map do |feature, index|
[ Document.create(index + 1, 1, 0, 0, feature), index%2 * -1]
end

@filepath = './test/assets/written_model'
@model = Model.new(:classification, @docs_and_labels, {}, {}, nil)
end

should "write a model from memmory to a file" do
@model.write_to_file(@filepath)

assert File.exists?(@filepath)
assert File.file?(@filepath)
# TODO: Implement actual model equality
assert_equal @model.support_vectors_count, Model.read_from_file(@filepath).support_vectors_count
end

# Need to find a good way to test this without relaying too much in the environment
should "raise ModelWriteError when it is impossible to write a model file"

teardown do
`rm #{@filepath} &> /dev/null`
end
end

context "when learning from new documents" do

setup do
Expand Down

0 comments on commit b728906

Please sign in to comment.