Skip to content

Commit

Permalink
+ preparing big interface rewrite for 1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
floere committed Nov 8, 2010
1 parent 4c531e1 commit 4ecc6e9
Show file tree
Hide file tree
Showing 13 changed files with 134 additions and 66 deletions.
2 changes: 1 addition & 1 deletion server/lib/picky/application.rb
Expand Up @@ -25,7 +25,7 @@ def default_querying options = {}
#
# TODO Rename category.
#
delegate :field, :to => :indexing
delegate :field, :location, :to => :indexing
def category *args; indexing.field *args; end
def index *args; indexing.define_index *args; end

Expand Down
5 changes: 4 additions & 1 deletion server/lib/picky/configuration/field.rb
Expand Up @@ -4,7 +4,7 @@ module Configuration
# (title is a category of a books index, for example).
#
class Field
attr_reader :name, :indexed_name, :virtual, :tokenizer
attr_reader :name, :indexed_name, :virtual, :tokenizer, :source
attr_accessor :type # convenience TODO Still needed?
def initialize name, options = {}
@name = name.to_sym
Expand Down Expand Up @@ -33,6 +33,9 @@ def generate_qualifiers_from options
def source
@source || type.source
end
def generate_with type
Index::Category.new self.name, type, @options
end
def generate
Index::Category.new self.name, type, @options
end
Expand Down
25 changes: 24 additions & 1 deletion server/lib/picky/configuration/indexes.rb
Expand Up @@ -17,17 +17,40 @@ def default_tokenizer options = {}
# TODO Rewrite all this configuration handling.
#
def define_index name, source, *fields
# TODO Make type, append fields?
#
new_type = Type.new name, source, *fields
types << new_type
::Indexes.configuration ||= self

generated = new_type.generate
generated = new_type.generate # Move this into the next line.
::Indexes.add generated
generated
end
def field name, options = {}
Field.new name, options
end
# def location name, options = {}
# p name, options
# # TODO Ugly. Rewrite.
# #
# grid = options.delete :grid
# precision = options.delete :precision
#
# new_field = field name, options
#
# class << new_field
#
# def type= v
# @type = v
# old_source = self.source
# self.source = Sources::Wrappers::Location.new old_source, grid:grid, precision:precision
# end
#
# end
#
# new_field
# end

#
#
Expand Down
16 changes: 8 additions & 8 deletions server/lib/picky/configuration/type.rb
Expand Up @@ -7,25 +7,25 @@ class Type
:result_type,
:ignore_unassigned_tokens,
:solr
def initialize name, source, *fields, options
if Configuration::Field === options
fields << options
options = {}
end
def initialize name, source, options # *fields,
# if Configuration::Field === options
# fields << options
# options = {}
# end

@name = name
@source = source
# dup, if field is reused. TODO Rewrite.
@fields = fields.map { |field| field = field.dup; field.type = self; field }
# @fields = fields.map { |field| field = field.dup; field.type = self; field }

@after_indexing = options[:after_indexing]
@result_type = options[:result_type] || name
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query?
# @solr = options[:solr] || nil
end
def generate
categories = fields.map { |field| field.generate }
Index::Type.new name, result_type, ignore_unassigned_tokens, *categories
# categories = fields.map { |field| field.generate } # TODO Move.
Index::Type.new name, result_type, ignore_unassigned_tokens #, *categories
end
def take_snapshot
source.take_snapshot self
Expand Down
13 changes: 8 additions & 5 deletions server/lib/picky/index/type.rb
Expand Up @@ -15,11 +15,8 @@ class Type
def initialize name, result_type, ignore_unassigned_tokens, *categories
@name = name
@result_type = result_type # TODO Move.
@categories = categories # for each_delegate
@combinator = combinator_for categories, ignore_unassigned_tokens
end
def combinator_for categories, ignore_unassigned_tokens
Query::Combinator.new @categories, ignore_unassigned_tokens: ignore_unassigned_tokens
@categories = categories # for each_delegate # TODO Use real Index::Categories object.
@combinator = Query::Combinator.new ignore_unassigned_tokens: ignore_unassigned_tokens
end

#
Expand All @@ -28,6 +25,12 @@ def possible_combinations token
@combinator.possible_combinations_for token
end

def category name_or_category, options = {}
category_to_be_added = Configuration::Field === name_or_category ? name_or_category.dup : Configuration::Field.new(name, options)
generated_category = category_to_be_added.generate_with self
combinator.add generated_category
end

end

end
2 changes: 2 additions & 0 deletions server/lib/picky/indexers/base.rb
Expand Up @@ -55,7 +55,9 @@ def process
File.open(search_index_file_name, 'w:binary') do |file|
result = []
source.harvest(@type, @field) do |indexed_id, text|
p [indexed_id, text] if [1,2,3,4,5].include?(indexed_id)
tokenizer.tokenize(text).each do |token_text|
p token_text if [1,2,3,4,5].include?(indexed_id)
result << indexed_id << comma << token_text << newline
end
file.write(result.join) && result.clear if result.size > 100_000
Expand Down
15 changes: 12 additions & 3 deletions server/lib/picky/query/combinator.rb
Expand Up @@ -7,17 +7,26 @@ class Combinator
attr_reader :categories, :category_hash
attr_reader :ignore_unassigned_tokens # TODO Should this actually be determined by the query? Probably, yes.

def initialize categories, options = {}
@categories = categories
@category_hash = hashify categories
def initialize options = {} # categories,
@categories = [] # categories
@category_hash = {} # hashify categories

@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
end

# TODO Spec.
#
def add category
categories << category
category_hash[category.name] = [category] # TODO An array seems silly.
end

# TODO Move somewhere else.
#
# TODO Or use active_support's?
#
# TODO Remove.
#
def hashify category_array
category_array.inject({}) do |hash, category|
hash[category.name] = [category]
Expand Down
16 changes: 10 additions & 6 deletions server/lib/picky/sources/wrappers/location.rb
Expand Up @@ -6,13 +6,15 @@ class Location < Base

attr_reader :precision, :grid

# TODO Save min and grid!
#
def initialize backend, options = {}
super backend

@user_grid = extract_user_grid options
@precision = extract_precision options

@grid = @user_grid * 0.666
@grid = @user_grid / (@precision + 0.5)
end

#
Expand All @@ -33,8 +35,7 @@ def extract_precision options
end

def reset
@min = -1.0/0
@max = 1.0/0
@min = 1.0/0
end

# Yield the data (id, text for id) for the given type and field.
Expand All @@ -49,25 +50,28 @@ def harvest type, field
# Gather min/max.
#
backend.harvest type, field do |indexed_id, location|
location = location.to_f
@min = location if location < @min
@max = location if location > @max
locations << [indexed_id, location]
end

# Add a margin.
#
marginize

# Recalculate locations.
#
locations.each do |indexed_id, location|
locations_for(location).each do |new_location|
yield indexed_id, new_location
yield indexed_id, new_location.to_s
end
end

p [@min, @grid]
end

def marginize
@min -= @user_grid
@max += @user_grid
end

# Put location onto multiple places on a grid.
Expand Down
5 changes: 4 additions & 1 deletion server/lib/picky/tokenizers/index.rb
Expand Up @@ -61,8 +61,11 @@ def token_for text
#
# Override in subclasses to redefine behaviour.
#
# TODO Make parametrizable! reject { |token| }
#
def reject tokens
tokens.reject! { |token| token.to_s.size < 2 }
tokens
# tokens.reject! { |token| token.to_s.size < 2 }
end

end
Expand Down
21 changes: 10 additions & 11 deletions server/project_prototype/app/application.rb
Expand Up @@ -24,17 +24,16 @@ class PickySearch < Application

# Define an index. Use a database etc. source? http://github.com/floere/picky/wiki/Sources-Configuration#sources
#
books_index = index :books,
Sources::CSV.new(:title, :author, :isbn, :year, :publisher, :subjects, file: 'app/library.csv'),
category(:title,
similarity: Similarity::Phonetic.new(3), # Up to three similar title word indexed (default: No similarity).
partial: Partial::Substring.new(from: 1)), # Indexes substrings upwards from character 1 (default: -3),
# You'll find "picky" even when entering just a "p".
category(:author,
partial: Partial::Substring.new(from: 1)),
category(:isbn,
partial: Partial::None.new) # Partial substring searching on an ISBN does not make
# much sense, neither does similarity.
books_index = index(:books, Sources::CSV.new(:title, :author, :isbn, :year, :publisher, :subjects, file: 'app/library.csv'))
.category(:title,
similarity: Similarity::Phonetic.new(3), # Up to three similar title word indexed (default: No similarity).
partial: Partial::Substring.new(from: 1)) # Indexes substrings upwards from character 1 (default: -3),
# You'll find "picky" even when entering just a "p".
.category(:author,
partial: Partial::Substring.new(from: 1))
.category(:isbn,
partial: Partial::None.new) # Partial substring searching on an ISBN does not make
# much sense, neither does similarity.

query_options = { :weights => { [:title, :author] => +3, [:title] => +1 } } # +/- points for ordered combinations.

Expand Down
20 changes: 18 additions & 2 deletions server/spec/lib/sources/wrappers/location_spec.rb
Expand Up @@ -36,10 +36,26 @@
end
context "with grid and precision option" do
before(:each) do
@wrapper = Sources::Wrappers::Location.new @backend, grid:10, precision:3
@wrapper = Sources::Wrappers::Location.new @backend, grid:4, precision:2
end
it "uses the given precision" do
@wrapper.precision.should == 3
@wrapper.precision.should == 2
end

describe "locations_for" do
before(:each) do
@wrapper.instance_variable_set :@min, -3
@wrapper.marginize
end
it "returns the right array" do
@wrapper.locations_for(17).should == [13, 14, 15, 16, 17] # TODO Correct?
end
it "returns the right array" do
@wrapper.locations_for(-3).should == [0, 1, 2, 3, 4]
end
it "returns the right array" do
@wrapper.locations_for(20).should == [14, 15, 16, 17, 18]
end
end
end
end
Expand Down
1 change: 1 addition & 0 deletions server/spec/lib/tokenizers/index_spec.rb
Expand Up @@ -68,6 +68,7 @@ def self.it_should_tokenize_token(text, expected)
#
it_should_tokenize_token "splitting on \\s", [:splitting, :on, :"\\s"]
it_should_tokenize_token 'und', [:und]
it_should_tokenize_token '7', [:'7']
end
end

Expand Down
59 changes: 32 additions & 27 deletions server/test_project/app/application.rb
Expand Up @@ -29,33 +29,31 @@ class BookSearch < Application
year = category :year, :qualifiers => [:y, :year, :annee]
isbn = category :isbn, :qualifiers => [:i, :isbn]

main_index = index :main,
Sources::DB.new('SELECT id, title, author, year FROM books', :file => 'app/db.yml'),
similar_title,
author,
year
main_index = index :main, Sources::DB.new('SELECT id, title, author, year FROM books', :file => 'app/db.yml')
main_index.category similar_title
main_index.category author
main_index.category year

isbn_index = index :isbn,
Sources::DB.new("SELECT id, isbn FROM books", :file => 'app/db.yml'),
field(:isbn, :qualifiers => [:i, :isbn])
isbn_index = index :isbn, Sources::DB.new("SELECT id, isbn FROM books", :file => 'app/db.yml')
isbn_index.category :isbn, :qualifiers => [:i, :isbn]

# geo_index = index :geo,
# Sources::CSV.new(:location, :north, :east, :file => 'data/locations.csv'),
# category(:location),
# geo_location(:north, grid: 10_000),
# geo_location(:east, grid: 10_000)
# # geo_location(:north, grid: 20_000, :as => :n20k),
# # geo_location(:east, grid: 20_000, :as => :e20k)
geo_index = index :geo, Sources::CSV.new(:location, :north, :east, :file => 'data/locations.csv')
geo_index.category :location),
geo_index.category :north, :source => Sources::Wrappers::Location.new(source, grid:2), :tokenizer => Tokenizers::Index.new
geo_index.category :east, :source => Sources::Wrappers::Location.new(source, grid:2), :tokenizer => Tokenizers::Index.new
# geo_index.location :north, grid: 2 # TODO partial does not make sense!
# geo_index.location :east, grid: 2
# geo_location(:north, grid: 20_000, :as => :n20k),
# geo_location(:east, grid: 20_000, :as => :e20k)

csv_test_index = index :csv_test, Sources::CSV.new(:title,:author,:isbn,:year,:publisher,:subjects, :file => 'data/books.csv')
csv_test_index.category similar_title
csv_test_index.category author
csv_test_index.category isbn
csv_test_index.category year
csv_test_index.category :publisher, :qualifiers => [:p, :publisher]
csv_test_index.category :subjects, :qualifiers => [:s, :subject]

csv_test_index = index :csv_test,
Sources::CSV.new(:title,:author,:isbn,:year,:publisher,:subjects, :file => 'data/books.csv'),
similar_title,
author,
isbn,
year,
category(:publisher, :qualifiers => [:p, :publisher]),
category(:subjects, :qualifiers => [:s, :subject])


options = {
:weights => {
Expand All @@ -71,16 +69,23 @@ class BookSearch < Application
full_csv = Query::Full.new csv_test_index, options
live_csv = Query::Live.new csv_test_index, options

full_isbn = Query::Full.new isbn_index, options
live_isbn = Query::Live.new isbn_index, options
full_isbn = Query::Full.new isbn_index
live_isbn = Query::Live.new isbn_index

full_geo = Query::Full.new geo_index
live_geo = Query::Live.new geo_index

route %r{^/books/full} => full_main,
%r{^/books/live} => live_main,

%r{^/csv/full} => full_csv,
%r{^/csv/live} => live_csv,

%r{^/isbn/full} => full_isbn,
%r{^/geo/live} => live_geo,

%r{^/geo/full} => full_geo,
%r{^/geo/live} => live_geo,

%r{^/all/full} => Query::Full.new(main_index, csv_test_index, isbn_index, geo_index, options),
%r{^/all/live} => Query::Live.new(main_index, csv_test_index, isbn_index, geo_index, options)

Expand Down

0 comments on commit 4ecc6e9

Please sign in to comment.