Skip to content

Commit

Permalink
! analyzer, + phonetics, + category doc
Browse files Browse the repository at this point in the history
  • Loading branch information
floere committed Dec 16, 2011
1 parent cced154 commit 3a232d9
Show file tree
Hide file tree
Showing 13 changed files with 65 additions and 32 deletions.
16 changes: 9 additions & 7 deletions server/lib/picky/analyzer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def analyze bundle
end
def cardinality identifier, index
return if index.size.zero?
return unless index.respond_to?(:each_pair)

key_length_average = 0
ids_length_average = 0
Expand All @@ -49,7 +50,7 @@ def cardinality identifier, index
min_key_length = 1.0/0 # Infinity
max_key_length = 0

key_size, ids_size =
key_size, ids_size = 0, 0
index.each_pair do |key, ids|
key_size = key.size
if key_size < min_key_length
Expand Down Expand Up @@ -91,7 +92,8 @@ def index_analysis
end
end
def weights index
return if index.size.zero?
return if !index.respond_to?(:size) || index.size.zero?
return unless index.respond_to?(:each_pair)

min_weight = 1.0/0 # Infinity
max_weight = 0.0
Expand Down Expand Up @@ -131,11 +133,11 @@ def to_s
end
def index_to_s
return if analysis[:__keys].zero?
[
"index key cardinality: #{"%10d" % analysis[:__keys]}",
"index key length range (avg): #{"%10s" % analysis[:index][:key_length]} (#{analysis[:index][:key_length_average].round(2)})",
"index ids per key length range (avg): #{"%10s" % analysis[:index][:ids_length]} (#{analysis[:index][:ids_length_average].round(2)})"
].join("\n")
ary = ["index key cardinality: #{"%10d" % analysis[:__keys]}"]
return ary.join "\n" unless analysis[:index]
ary << "index key length range (avg): #{"%10s" % analysis[:index][:key_length]} (#{analysis[:index][:key_length_average].round(2)})"
ary << "index ids per key length range (avg): #{"%10s" % analysis[:index][:ids_length]} (#{analysis[:index][:ids_length_average].round(2)})"
ary.join "\n"
end
def weights_to_s
return unless analysis[:weights]
Expand Down
6 changes: 6 additions & 0 deletions server/lib/picky/backends/file/json.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ def clear
self.mapping.clear
end

# Size of the index is equal to the mapping size.
#
def size
self.mapping.size
end

# Loads the mapping hash from json format.
#
def load
Expand Down
9 changes: 9 additions & 0 deletions server/lib/picky/backends/redis/list.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,15 @@ def clear
end
end

# Size of the list(s).
#
def size
redis_key = "#{namespace}:*"
client.keys(redis_key).inject(0) do |total, key|
total + client.zcard(key)
end
end

# Deletes the list for the key.
#
def delete key
Expand Down
6 changes: 6 additions & 0 deletions server/lib/picky/backends/redis/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ def clear
client.del namespace
end

# Returns the size of the hash.
#
def size
client.hlen namespace
end

# Deletes the single value.
#
def delete key
Expand Down
5 changes: 5 additions & 0 deletions server/lib/picky/backends/sqlite/array.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ def create_table
db.execute 'create table key_value (key varchar(255), value text);'
end

def size
result = db.execute 'SELECT COUNT(*) FROM key_value'
result.first.first.to_i
end

def []= key, array
unless array.empty?
db.execute 'INSERT OR REPLACE INTO key_value (key,value) VALUES (?,?)',
Expand Down
7 changes: 1 addition & 6 deletions server/lib/picky/bundle.rb
Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,8 @@ def create_backends
# the strategy itself pretends to be an index.
#
def initialize_backends
# @inverted, @weights, @similarity, @configuration, @realtime = backend.initial weight_strategy
@inverted = @backend_inverted.initial
# TODO @weights = @weight_strategy.initial || @backend_weights.initial
#
@weights = @weight_strategy.saved?? @backend_weights.initial : @weight_strategy
@weights = @weight_strategy.saved? ? @backend_weights.initial : @weight_strategy
@similarity = @backend_similarity.initial
@configuration = @backend_configuration.initial
@realtime = @backend_realtime.initial
Expand All @@ -106,8 +103,6 @@ def initialize_backends
#
def empty
@inverted = @backend_inverted.empty
# THINK about this. Perhaps the strategies should implement the backend methods?
#
@weights = @weight_strategy.saved? ? @backend_weights.empty : @weight_strategy
@similarity = @backend_similarity.empty
@configuration = @backend_configuration.empty
Expand Down
4 changes: 1 addition & 3 deletions server/lib/picky/bundle_indexed.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ class Bundle
#
def ids sym_or_string
@inverted[sym_or_string] || []
# TODO ?
# THINK Place the key_format conversion here – or move into the backend?
#
# if @key_format
# class << self
Expand Down Expand Up @@ -79,8 +79,6 @@ def load_inverted
# Loads the weights index.
#
def load_weights
# TODO @weights = @weight_strategy.load || @backend_weights.load
#
self.weights = @backend_weights.load if @weight_strategy.saved?
end
# Loads the similarity index.
Expand Down
2 changes: 2 additions & 0 deletions server/lib/picky/bundle_realtime.rb
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def add_similarity str_or_sym, where = :unshift
#
similars.delete str_or_sym if similars.include? str_or_sym
similars << str_or_sym

self.similarity_strategy.prioritize similars, str_or_sym
end
end

Expand Down
9 changes: 6 additions & 3 deletions server/lib/picky/category.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@ class Category
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
# * from: The source category identifier to take the data from.
# * key_format: What this category's keys are formatted with (default is :to_i)
# * backend: The backend to use. Default is Backends::Memory.new.
# Other options are: Backends::Redis.new, Backends::SQLite.new, Backends::File.new.
# * qualifiers: Which qualifiers can be used to predefine the category. E.g. "title:bla".
#
# Advanced Options:
# * source: Use if the category should use a different source.
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
# * weight: Query::Weights.new( [:category1, :category2] => +2, ... )
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
# * key_format: What this category's keys are formatted with (default is :to_i)
# * use_symbols: Whether to use symbols internally instead of strings.
# * source: Use if the category should use a different source.
#
def initialize name, index, options = {}
@name = name
Expand Down
6 changes: 2 additions & 4 deletions server/lib/picky/generators/similarity/phonetic.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,9 @@ def check_gem # :nodoc:

# Sorts the index values in place.
#
# Not used currently.
#
def prioritize! ary, code
def prioritize ary, code
ary.sort_by_levenshtein! code
ary.slice! amount, ary.size # THINK size is not perfectly correct, but anyway
ary.slice! amount, ary.size # Note: The ary.size is not perfectly correct.
end

end
Expand Down
3 changes: 2 additions & 1 deletion server/lib/picky/interfaces/live_parameters/master_child.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ def extract_configuration
}
end

# TODO Move to Interface object.
# THINK What to do about this? Standardize the tokenizer interface,
# then access each individual tokenizer.
#
def querying_removes_characters
regexp = Tokenizer.searching.instance_variable_get :@removes_characters_regexp
Expand Down
2 changes: 1 addition & 1 deletion server/spec/lib/bundle_indexing_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
bundle.similar(:dragon).should == [:dargon]
end
it 'returns the right similars' do
bundle.similar(:trkn).should == [:dragon, :dargon]
bundle.similar(:trkn).should == [:dargon, :dragon]
end
it 'performs' do
performance_of { bundle.similar(:dragon) }.should < 0.000075
Expand Down
22 changes: 15 additions & 7 deletions server/spec/lib/generators/similarity/phonetic_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,53 +21,61 @@
instance.send :initialize
end

describe 'prioritize!' do
describe 'prioritize' do
let(:phonetic) { described_class.allocate }
it 'sorts correctly' do
phonetic.instance_variable_set :@amount, 2

ary = [:a, :b, :c]
phonetic.prioritize! ary, :b
phonetic.prioritize ary, :b

ary.should == [:b, :a]
end
it 'sorts correctly' do
phonetic.instance_variable_set :@amount, 2

ary = [:aaa, :aa, :aaaa]
phonetic.prioritize! ary, :aaa
phonetic.prioritize ary, :aaa

ary.should == [:aaa, :aa]
end
it 'sorts correctly' do
phonetic.instance_variable_set :@amount, 3

ary = [:aaa, :aa, :aaaa]
phonetic.prioritize! ary, :aaa
phonetic.prioritize ary, :aaa

ary.should == [:aaa, :aa, :aaaa]
end
it 'sorts correctly' do
phonetic.instance_variable_set :@amount, 3

ary = [:aaaaa, :aa, :aaaa]
phonetic.prioritize! ary, :aaa
phonetic.prioritize ary, :aaa

ary.should == [:aaaa, :aa, :aaaaa]
end
it 'sorts correctly' do
phonetic.instance_variable_set :@amount, 3

ary = [:aaaaa, :aa]
phonetic.prioritize! ary, :aaa
phonetic.prioritize ary, :aaa

ary.should == [:aa, :aaaaa]
end
it 'sorts correctly' do
phonetic.instance_variable_set :@amount, 3

ary = [:aaa]
phonetic.prioritize! ary, :aaa
phonetic.prioritize ary, :aaa

ary.should == [:aaa]
end
it 'sorts correctly' do
phonetic.instance_variable_set :@amount, 1

ary = [:a, :aa, :aaa]
phonetic.prioritize ary, :aaa

ary.should == [:aaa]
end
Expand Down

0 comments on commit 3a232d9

Please sign in to comment.