Skip to content

Commit

Permalink
shorten model name
Browse files Browse the repository at this point in the history
  • Loading branch information
chochkov committed Feb 16, 2012
1 parent 2fdf3bc commit 4de908b
Show file tree
Hide file tree
Showing 13 changed files with 80 additions and 80 deletions.
4 changes: 2 additions & 2 deletions benchmark/benchmark.rb
Expand Up @@ -12,7 +12,7 @@
@training_times = []
@classification_times = []

records_count_at_start = GreenMidgetRecords.count
records_count_at_start = Records.count

def generate_text(message_length = 1)
message ||= []
Expand All @@ -36,7 +36,7 @@ def generate_text(message_length = 1)

puts " ------------------------------- "
puts " Average seconds from #{TRAININGS} trainings and #{CLASSIFICATIONS} classifications. #{MESSAGE_LENGTH} words per message:"
puts " Number of records at start: #{records_count_at_start} and at the end: #{GreenMidgetRecords.count}"
puts " Number of records at start: #{records_count_at_start} and at the end: #{Records.count}"
puts " ------------------------------- "
puts " Training times: #{(@training_times.sum.to_f/TRAININGS).round(4)}"
puts " ------------------------------- "
Expand Down
2 changes: 1 addition & 1 deletion benchmark/test.rb
Expand Up @@ -11,7 +11,7 @@

ActiveRecord::Base.establish_connection(:adapter => 'mysql', :username => 'root', :password => 'root', :database => 'soundcloud_development_temp')

@ham = [ 'messages', 'comments', 'posts' ].map { |table| GreenMidgetRecords.find_by_sql("select body from #{table} limit 1500").to_a.inject([]) { |memo, hash| memo << hash["body"] } }
@ham = [ 'messages', 'comments', 'posts' ].map { |table| Records.find_by_sql("select body from #{table} limit 1500").to_a.inject([]) { |memo, hash| memo << hash["body"] } }

ActiveRecord::Base.establish_connection(:adapter => 'mysql', :username => 'root', :password => 'root', :database => 'classifier_development_weird')
#
Expand Down
2 changes: 1 addition & 1 deletion lib/green_midget.rb
Expand Up @@ -7,7 +7,7 @@
require 'green_midget/models/countable'
require 'green_midget/models/examples'
require 'green_midget/models/features'
require 'green_midget/models/green_midget_records'
require 'green_midget/models/records'
require 'green_midget/models/words'
require 'green_midget/extensions/classifier'

Expand Down
4 changes: 2 additions & 2 deletions lib/green_midget/base.rb
Expand Up @@ -11,7 +11,7 @@ def classify
end
end

GreenMidgetRecords.fetch_all(words)
Records.fetch_all(words)
register_classification

factor = log_ratio
Expand All @@ -30,7 +30,7 @@ def classify_as!(category)
object.record_key(category)
end

GreenMidgetRecords.increment(keys)
Records.increment(keys)
register_training
end

Expand Down
2 changes: 1 addition & 1 deletion lib/green_midget/models/countable.rb
Expand Up @@ -18,7 +18,7 @@ def initialize(key)
end

def [](category)
GreenMidgetRecords[record_key(category)].to_f
Records[record_key(category)].to_f
end

def log_ratio
Expand Down
@@ -1,6 +1,6 @@
# Copyright (c) 2011, SoundCloud Ltd., Nikola Chochkov
module GreenMidget
class GreenMidgetRecords < ActiveRecord::Base
class Records < ActiveRecord::Base
set_table_name :green_midget_records

def self.fetch_all(words = [])
Expand Down
8 changes: 4 additions & 4 deletions lib/tasks/green_midget.rake
Expand Up @@ -8,8 +8,8 @@ namespace :green_midget do
task :active_record => :environment do
include GreenMidget

unless GreenMidgetRecords.table_exists?
CreateGreenMidgetRecords.up
unless Records.table_exists?
CreateRecords.up
end

keys = [ ALTERNATIVE, NULL ].map do |hypothesis|
Expand All @@ -24,8 +24,8 @@ namespace :green_midget do

puts '== Creating records ==='
keys.each { |key|
unless GreenMidgetRecords.find_by_key(key)
GreenMidgetRecords.create(key)
unless Records.find_by_key(key)
Records.create(key)
puts "-- Created #{key}"
end
}
Expand Down
14 changes: 7 additions & 7 deletions spec/base_spec.rb
Expand Up @@ -6,7 +6,7 @@
include GreenMidget

before(:each) do
GreenMidgetRecords.delete_all
Records.delete_all
[
{:key => "#{ Words.prefix }this::#{ ALTERNATIVE }_count", :value => 701.0 },
{:key => "#{ Words.prefix }this::#{ NULL }_count", :value => 11.0 },
Expand Down Expand Up @@ -35,7 +35,7 @@
{:key => "#{ Examples.prefix }email_in_text::#{ ALTERNATIVE }_count", :value => 1000.0 },
{:key => "#{ Examples.prefix }email_in_text::#{ NULL }_count", :value => 1000.0 },
].each do |entry|
GreenMidgetRecords.create(entry)
Records.create(entry)
end
end

Expand Down Expand Up @@ -106,23 +106,23 @@
it "should increase the index counts of the classified words" do
lambda {
Tester.new('zero').classify_as!(NULL)
}.should change { GreenMidgetRecords.find_by_key(Words['zero'].record_key(NULL)).value.to_f }.by(1)
}.should change { Records.find_by_key(Words['zero'].record_key(NULL)).value.to_f }.by(1)
end
it "should increment the learning examples count for all features" do
FEATURES.each do |feature|
lambda {
Tester.new('zero').classify_as!(NULL)
}.should change { GreenMidgetRecords.find_by_key(Examples[feature].record_key(NULL)).value.to_f }.by(1)
}.should change { Records.find_by_key(Examples[feature].record_key(NULL)).value.to_f }.by(1)
end
end
it "should not add new records for known keys" do
a = Tester.new 'stuff unknown sofar'
lambda {
a.classify_as! ALTERNATIVE
}.should change { GreenMidgetRecords.count }.by(3)
}.should change { Records.count }.by(3)
lambda {
a.classify_as! ALTERNATIVE
}.should_not change { GreenMidgetRecords.count }
}.should_not change { Records.count }
end
end

Expand Down Expand Up @@ -150,7 +150,7 @@
# pending('todo')
# end
# it "throw an exception if no training examples were given, but it's asked for classification" do
# # if GreenMidgetRecords.count(ALTERNATIVE) or GreenMidgetRecords.count(NULL) is 0.0 => throw an exception
# # if Records.count(ALTERNATIVE) or Records.count(NULL) is 0.0 => throw an exception
# pending('todo')
# end
# end
Expand Down
38 changes: 19 additions & 19 deletions spec/examples_spec.rb
Expand Up @@ -5,8 +5,8 @@
include GreenMidget

before(:each) do
GreenMidgetRecords.delete_all
GreenMidgetRecords.class_variable_set("@@cache", {})
Records.delete_all
Records.class_variable_set("@@cache", {})
end

describe "#[]()" do
Expand All @@ -17,17 +17,17 @@
end

it "should return the general feature examples if passed a (new) feature key that has no examples yet" do
GreenMidgetRecords.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count", :value => 1000)
GreenMidgetRecords.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ ALTERNATIVE }_count", :value => 1000)
GreenMidgetRecords.find_by_key(Examples.prefix + "new::#{ NULL }_count").should == nil
GreenMidgetRecords.fetch_all
Records.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count", :value => 1000)
Records.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ ALTERNATIVE }_count", :value => 1000)
Records.find_by_key(Examples.prefix + "new::#{ NULL }_count").should == nil
Records.fetch_all
CATEGORIES.each do |category|
Examples['new'][category].should == Examples[Examples::GENERAL_FEATURE_NAME][category]
end
end
it "should return the feature's own example counts if these exist" do
GreenMidgetRecords.create(:key => Examples.prefix + "new::#{ NULL }_count", :value => 3)
GreenMidgetRecords.create(:key => Examples.prefix + "new::#{ ALTERNATIVE }_count", :value => 1)
Records.create(:key => Examples.prefix + "new::#{ NULL }_count", :value => 3)
Records.create(:key => Examples.prefix + "new::#{ ALTERNATIVE }_count", :value => 1)
Examples['new'][NULL].should == 3
end

Expand All @@ -36,34 +36,34 @@
end

it "should throw an error if the general feature examples has a zero spam_count and ham_count" do
GreenMidgetRecords.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count")
Records.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count")
@call_any.should raise_error
end

it "should throw an error if the general feature examples has a zero spam_count or ham_count" do
GreenMidgetRecords.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count", :value => 0)
Records.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count", :value => 0)
@call_any.should raise_error
end

it "should not throw an error if both columns are positive" do
GreenMidgetRecords.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count", :value => 2)
GreenMidgetRecords.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ ALTERNATIVE }_count", :value => 1)
Records.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ NULL }_count", :value => 2)
Records.create(:key => Examples.prefix + "#{ Examples::GENERAL_FEATURE_NAME }::#{ ALTERNATIVE }_count", :value => 1)
@call_any.should_not raise_error
end
end

describe "#probability_for" do
it "should return the probability of a feature falling into category as: Examples[feature][category] / (Examples[feature][ALTERNATIVE] + Examples[feature][NULL])" do
GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(NULL), :value => 1000)
GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE), :value => 150 )
Records.create(:key => Examples['url_in_text'].record_key(NULL), :value => 1000)
Records.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE), :value => 150 )
Examples['url_in_text'].probability_for(ALTERNATIVE).should == 150.0/(1000 + 150)
end
end

describe "#no_examples?" do
before(:each) do
GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE))
GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(NULL))
Records.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE))
Records.create(:key => Examples['url_in_text'].record_key(NULL))
@object = Examples['url_in_text']
end

Expand All @@ -72,13 +72,13 @@
end

it "should return true if spam_count or ham_count are zero" do
GreenMidgetRecords.find_by_key(@object.record_key(NULL)).update_attribute(:value, 1)
Records.find_by_key(@object.record_key(NULL)).update_attribute(:value, 1)
@object.no_examples?.should be_true
end

it "should should return false if both spam_count and ham_count are positive" do
GreenMidgetRecords.find_by_key(@object.record_key(NULL)).update_attribute(:value, 1)
GreenMidgetRecords.find_by_key(@object.record_key(ALTERNATIVE)).update_attribute(:value, 1)
Records.find_by_key(@object.record_key(NULL)).update_attribute(:value, 1)
Records.find_by_key(@object.record_key(ALTERNATIVE)).update_attribute(:value, 1)
@object.no_examples?.should be_false
end
end
Expand Down
12 changes: 6 additions & 6 deletions spec/features_spec.rb
Expand Up @@ -5,17 +5,17 @@
include GreenMidget

before(:each) do
GreenMidgetRecords.delete_all
GreenMidgetRecords.class_variable_set("@@cache", {})
Records.delete_all
Records.class_variable_set("@@cache", {})
end

describe "#probability_for" do
it "should return Feature[feature] / Examples[feature]" do
GreenMidgetRecords.create(:key => Features["url_in_text"].record_key(NULL), :value => 20 )
GreenMidgetRecords.create(:key => Features["url_in_text"].record_key(ALTERNATIVE), :value => 10 )
Records.create(:key => Features["url_in_text"].record_key(NULL), :value => 20 )
Records.create(:key => Features["url_in_text"].record_key(ALTERNATIVE), :value => 10 )

GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(NULL), :value => 100 )
GreenMidgetRecords.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE), :value => 1000)
Records.create(:key => Examples['url_in_text'].record_key(NULL), :value => 100 )
Records.create(:key => Examples['url_in_text'].record_key(ALTERNATIVE), :value => 1000)

Features['url_in_text'].probability_for(NULL).should == 20.0/100
Features['url_in_text'].probability_for(ALTERNATIVE).should == 10.0/1000
Expand Down
66 changes: 33 additions & 33 deletions spec/green_midget_records_spec.rb
@@ -1,64 +1,64 @@
# Copyright (c) 2011, SoundCloud Ltd., Nikola Chochkov
require 'spec_helper'

describe GreenMidget::GreenMidgetRecords do
describe GreenMidget::Records do
include GreenMidget

before(:each) do
GreenMidgetRecords.delete_all
Records.delete_all
end

describe "#[]()" do
it "should take words from data store if not found in the cache" do
word_key, phrase_key = [ 'word', 'phrase' ].map { |w| Words[w].record_key(NULL) }
GreenMidgetRecords.fetch_all([ 'word' ])
GreenMidgetRecords.create(:key => phrase_key)
GreenMidgetRecords.find_by_key(word_key).should == nil
GreenMidgetRecords.find_by_key(phrase_key).should_not == nil
GreenMidgetRecords[phrase_key].should == ''
Records.fetch_all([ 'word' ])
Records.create(:key => phrase_key)
Records.find_by_key(word_key).should == nil
Records.find_by_key(phrase_key).should_not == nil
Records[phrase_key].should == ''
end
it "should add a {key => ''} to the cache if key not found in cache and in the data store" do
key = Words['nonexisting'].record_key(NULL)
GreenMidgetRecords[key].should == ''
GreenMidgetRecords.find_by_key(key).should == nil
Records[key].should == ''
Records.find_by_key(key).should == nil
end
end

describe "#fetch_all" do
it "should empty cache before fetching" do
bar_key = Words['bar'].record_key(ALTERNATIVE)
GreenMidgetRecords.fetch_all([ 'foo', 'bar' ])
GreenMidgetRecords.class_variable_get("@@cache").key?(bar_key).should be_true
GreenMidgetRecords.fetch_all([ 'foo', 'newbar' ])
GreenMidgetRecords.class_variable_get("@@cache").key?(bar_key).should be_false
Records.fetch_all([ 'foo', 'bar' ])
Records.class_variable_get("@@cache").key?(bar_key).should be_true
Records.fetch_all([ 'foo', 'newbar' ])
Records.class_variable_get("@@cache").key?(bar_key).should be_false
end
it "does a multi get on all words and keys" do
cache = GreenMidgetRecords.fetch_all([ 'foo', 'bar' ])
cache['foo'].should.eql? GreenMidgetRecords.class_eval{new(:key => 'foo')}
cache = Records.fetch_all([ 'foo', 'bar' ])
cache['foo'].should.eql? Records.class_eval{new(:key => 'foo')}
end
it "should fetch the system keys along with the given words" do
key = Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count"
GreenMidgetRecords.create(:key => key)
GreenMidgetRecords.fetch_all([])
cache = GreenMidgetRecords.class_variable_get("@@cache")
Records.create(:key => key)
Records.fetch_all([])
cache = Records.class_variable_get("@@cache")
cache.key?(key).should be_true
cache.count.should == 1
end
it "words with zero examples or no record in the database should be present in the cache" do
GreenMidgetRecords.create(:key => Words['kotoba'].record_key(NULL))
GreenMidgetRecords.fetch_all(['kotoba'])
GreenMidgetRecords.class_variable_get("@@cache").key?(Words['kotoba'].record_key(ALTERNATIVE)).should be_true
GreenMidgetRecords.create(:key => Words['mouichidou'].record_key(NULL), :value => 0)
GreenMidgetRecords.create(:key => Words['mouichidou'].record_key(ALTERNATIVE), :value => 3)
GreenMidgetRecords.fetch_all(['mouichidou'])
GreenMidgetRecords.class_variable_get("@@cache")[Words['mouichidou'].record_key(NULL)].should_not == nil
GreenMidgetRecords.class_variable_get("@@cache")[Words['mouichidou'].record_key(ALTERNATIVE)].should_not == nil
Records.create(:key => Words['kotoba'].record_key(NULL))
Records.fetch_all(['kotoba'])
Records.class_variable_get("@@cache").key?(Words['kotoba'].record_key(ALTERNATIVE)).should be_true
Records.create(:key => Words['mouichidou'].record_key(NULL), :value => 0)
Records.create(:key => Words['mouichidou'].record_key(ALTERNATIVE), :value => 3)
Records.fetch_all(['mouichidou'])
Records.class_variable_get("@@cache")[Words['mouichidou'].record_key(NULL)].should_not == nil
Records.class_variable_get("@@cache")[Words['mouichidou'].record_key(ALTERNATIVE)].should_not == nil
end
it "the cache should be a hash; its keys should be strings" do
GreenMidgetRecords.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count")
GreenMidgetRecords.create(:key => Features.prefix + "url_in_text::#{ NULL }_count")
GreenMidgetRecords.fetch_all([])
cache = GreenMidgetRecords.class_variable_get("@@cache")
Records.create(:key => Examples.prefix + Examples::GENERAL_FEATURE_NAME + "::#{ NULL }_count")
Records.create(:key => Features.prefix + "url_in_text::#{ NULL }_count")
Records.fetch_all([])
cache = Records.class_variable_get("@@cache")
cache.class.should.eql? Hash
cache.count.should == 2
cache.keys.each do |key|
Expand All @@ -70,11 +70,11 @@
describe "#increment" do
it "should increment counts first in cache and write! to store only if explicitly called" do
record_key = Words['stuff'].record_key(NULL)
GreenMidgetRecords.create(:key => record_key)
Records.create(:key => record_key)

lambda {
GreenMidgetRecords.increment(record_key)
}.should change { GreenMidgetRecords.find_by_key(record_key).value.to_f }.by(1)
Records.increment(record_key)
}.should change { Records.find_by_key(record_key).value.to_f }.by(1)
end
end
end
2 changes: 1 addition & 1 deletion spec/tester.rb
Expand Up @@ -11,7 +11,7 @@ def words
end

def log_ratio
GreenMidgetRecords.fetch_all(words)
Records.fetch_all(words)
super
end
end

0 comments on commit 4de908b

Please sign in to comment.