+ preparing big interface rewrite for 1.0

floere · Nov 8, 2010 · 4ecc6e9 · 4ecc6e9
1 parent 4c531e1
commit 4ecc6e9
Show file tree

Hide file tree

Showing 13 changed files with 134 additions and 66 deletions.
diff --git a/server/lib/picky/application.rb b/server/lib/picky/application.rb
@@ -25,7 +25,7 @@ def default_querying options = {}
     #
     # TODO Rename category.
     #
-    delegate :field, :to => :indexing
+    delegate :field, :location, :to => :indexing
     def category *args; indexing.field *args;        end
     def index *args;    indexing.define_index *args; end
 

diff --git a/server/lib/picky/configuration/field.rb b/server/lib/picky/configuration/field.rb
@@ -4,7 +4,7 @@ module Configuration
   # (title is a category of a books index, for example).
   #
   class Field
-    attr_reader :name, :indexed_name, :virtual, :tokenizer
+    attr_reader :name, :indexed_name, :virtual, :tokenizer, :source
     attr_accessor :type # convenience TODO Still needed?
     def initialize name, options = {}
       @name            = name.to_sym
@@ -33,6 +33,9 @@ def generate_qualifiers_from options
     def source
       @source || type.source
     end
+    def generate_with type
+      Index::Category.new self.name, type, @options
+    end
     def generate
       Index::Category.new self.name, type, @options
     end

diff --git a/server/lib/picky/configuration/indexes.rb b/server/lib/picky/configuration/indexes.rb
@@ -17,17 +17,40 @@ def default_tokenizer options = {}
     # TODO Rewrite all this configuration handling.
     #
     def define_index name, source, *fields
+      # TODO Make type, append fields?
+      #
       new_type = Type.new name, source, *fields
       types << new_type
       ::Indexes.configuration ||= self
 
-      generated = new_type.generate
+      generated = new_type.generate # Move this into the next line.
       ::Indexes.add generated
       generated
     end
     def field name, options = {}
       Field.new name, options
     end
+    # def location name, options = {}
+    #   p name, options
+    #   # TODO Ugly. Rewrite.
+    #   #
+    #   grid      = options.delete :grid
+    #   precision = options.delete :precision
+    #   
+    #   new_field = field name, options
+    #   
+    #   class << new_field
+    #     
+    #     def type= v
+    #       @type = v
+    #       old_source = self.source
+    #       self.source = Sources::Wrappers::Location.new old_source, grid:grid, precision:precision
+    #     end
+    #     
+    #   end
+    #   
+    #   new_field
+    # end
 
     #
     #

diff --git a/server/lib/picky/configuration/type.rb b/server/lib/picky/configuration/type.rb
@@ -7,25 +7,25 @@ class Type
                 :result_type,
                 :ignore_unassigned_tokens,
                 :solr
-    def initialize name, source, *fields, options
-      if Configuration::Field === options
-        fields << options
-        options = {}
-      end
+    def initialize name, source, options # *fields, 
+      # if Configuration::Field === options
+      #   fields << options
+      #   options = {}
+      # end
 
       @name                     = name
       @source                   = source
                                   # dup, if field is reused. TODO Rewrite.
-      @fields                   = fields.map { |field| field = field.dup; field.type = self; field }
+      # @fields                   = fields.map { |field| field = field.dup; field.type = self; field }
 
       @after_indexing           = options[:after_indexing]
       @result_type              = options[:result_type] || name
       @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false       # TODO Move to query?
       # @solr                     = options[:solr] || nil
     end
     def generate
-      categories = fields.map { |field| field.generate }
-      Index::Type.new name, result_type, ignore_unassigned_tokens, *categories
+      # categories = fields.map { |field| field.generate } # TODO Move.
+      Index::Type.new name, result_type, ignore_unassigned_tokens #, *categories
     end
     def take_snapshot
       source.take_snapshot self

diff --git a/server/lib/picky/index/type.rb b/server/lib/picky/index/type.rb
@@ -15,11 +15,8 @@ class Type
     def initialize name, result_type, ignore_unassigned_tokens, *categories
       @name        = name
       @result_type = result_type # TODO Move.
-      @categories  = categories # for each_delegate
-      @combinator  = combinator_for categories, ignore_unassigned_tokens
-    end
-    def combinator_for categories, ignore_unassigned_tokens
-       Query::Combinator.new @categories, ignore_unassigned_tokens: ignore_unassigned_tokens
+      @categories  = categories # for each_delegate # TODO Use real Index::Categories object.
+      @combinator  = Query::Combinator.new ignore_unassigned_tokens: ignore_unassigned_tokens
     end
 
     #
@@ -28,6 +25,12 @@ def possible_combinations token
       @combinator.possible_combinations_for token
     end
 
+    def category name_or_category, options = {}
+      category_to_be_added = Configuration::Field === name_or_category ? name_or_category.dup : Configuration::Field.new(name, options)
+      generated_category = category_to_be_added.generate_with self
+      combinator.add generated_category
+    end
+
   end
 
 end
diff --git a/server/lib/picky/indexers/base.rb b/server/lib/picky/indexers/base.rb
@@ -55,7 +55,9 @@ def process
       File.open(search_index_file_name, 'w:binary') do |file|
         result = []
         source.harvest(@type, @field) do |indexed_id, text|
+          p [indexed_id, text] if [1,2,3,4,5].include?(indexed_id)
           tokenizer.tokenize(text).each do |token_text|
+            p token_text if [1,2,3,4,5].include?(indexed_id)
             result << indexed_id << comma << token_text << newline
           end
           file.write(result.join) && result.clear if result.size > 100_000

diff --git a/server/lib/picky/query/combinator.rb b/server/lib/picky/query/combinator.rb
@@ -7,17 +7,26 @@ class Combinator
     attr_reader :categories, :category_hash
     attr_reader :ignore_unassigned_tokens # TODO Should this actually be determined by the query? Probably, yes.
 
-    def initialize categories, options = {}
-      @categories               = categories
-      @category_hash            = hashify categories
+    def initialize options = {} # categories, 
+      @categories               = [] # categories
+      @category_hash            = {} # hashify categories
 
       @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
     end
 
+    # TODO Spec.
+    #
+    def add category
+      categories << category
+      category_hash[category.name] = [category] # TODO An array seems silly.
+    end
+
     # TODO Move somewhere else.
     #
     # TODO Or use active_support's?
     #
+    # TODO Remove.
+    #
     def hashify category_array
       category_array.inject({}) do |hash, category|
         hash[category.name] = [category]

diff --git a/server/lib/picky/sources/wrappers/location.rb b/server/lib/picky/sources/wrappers/location.rb
@@ -6,13 +6,15 @@ class Location < Base
 
       attr_reader :precision, :grid
 
+      # TODO Save min and grid!
+      #
       def initialize backend, options = {}
         super backend
 
         @user_grid = extract_user_grid options
         @precision = extract_precision options
 
-        @grid      = @user_grid * 0.666
+        @grid      = @user_grid / (@precision + 0.5)
       end
 
       #
@@ -33,8 +35,7 @@ def extract_precision options
       end
 
       def reset
-        @min = -1.0/0
-        @max = 1.0/0
+        @min = 1.0/0
       end
 
       # Yield the data (id, text for id) for the given type and field.
@@ -49,25 +50,28 @@ def harvest type, field
         # Gather min/max.
         #
         backend.harvest type, field do |indexed_id, location|
+          location = location.to_f
           @min = location if location < @min
-          @max = location if location > @max
           locations << [indexed_id, location]
         end
 
+        # Add a margin.
+        #
         marginize
 
         # Recalculate locations.
         #
         locations.each do |indexed_id, location|
           locations_for(location).each do |new_location|
-            yield indexed_id, new_location
+            yield indexed_id, new_location.to_s
           end
         end
+
+        p [@min, @grid]
       end
 
       def marginize
         @min -= @user_grid
-        @max += @user_grid
       end
 
       # Put location onto multiple places on a grid.

diff --git a/server/lib/picky/tokenizers/index.rb b/server/lib/picky/tokenizers/index.rb
@@ -61,8 +61,11 @@ def token_for text
     #
     # Override in subclasses to redefine behaviour.
     #
+    # TODO Make parametrizable! reject { |token| }
+    #
     def reject tokens
-      tokens.reject! { |token| token.to_s.size < 2 }
+      tokens
+      # tokens.reject! { |token| token.to_s.size < 2 }
     end
 
   end

diff --git a/server/project_prototype/app/application.rb b/server/project_prototype/app/application.rb
@@ -24,17 +24,16 @@ class PickySearch < Application
 
   # Define an index. Use a database etc. source? http://github.com/floere/picky/wiki/Sources-Configuration#sources
   #
-  books_index = index :books,
-                      Sources::CSV.new(:title, :author, :isbn, :year, :publisher, :subjects, file: 'app/library.csv'),
-                      category(:title,
-                               similarity: Similarity::Phonetic.new(3),   # Up to three similar title word indexed (default: No similarity).
-                               partial: Partial::Substring.new(from: 1)), # Indexes substrings upwards from character 1 (default: -3),
-                                                                          # You'll find "picky" even when entering just a "p".
-                      category(:author,
-                               partial: Partial::Substring.new(from: 1)),
-                      category(:isbn,
-                               partial: Partial::None.new) # Partial substring searching on an ISBN does not make
-                                                           # much sense, neither does similarity.
+  books_index = index(:books, Sources::CSV.new(:title, :author, :isbn, :year, :publisher, :subjects, file: 'app/library.csv'))
+                  .category(:title,
+                            similarity: Similarity::Phonetic.new(3),  # Up to three similar title word indexed (default: No similarity).
+                            partial: Partial::Substring.new(from: 1)) # Indexes substrings upwards from character 1 (default: -3),
+                                                                      # You'll find "picky" even when entering just a "p".
+                  .category(:author,
+                            partial: Partial::Substring.new(from: 1))
+                  .category(:isbn,
+                            partial: Partial::None.new) # Partial substring searching on an ISBN does not make
+                                                        # much sense, neither does similarity.
 
   query_options = { :weights => { [:title, :author] => +3, [:title] => +1 } } # +/- points for ordered combinations.
 

diff --git a/server/spec/lib/sources/wrappers/location_spec.rb b/server/spec/lib/sources/wrappers/location_spec.rb
@@ -36,10 +36,26 @@
     end
     context "with grid and precision option" do
       before(:each) do
-        @wrapper = Sources::Wrappers::Location.new @backend, grid:10, precision:3
+        @wrapper = Sources::Wrappers::Location.new @backend, grid:4, precision:2
       end
       it "uses the given precision" do
-        @wrapper.precision.should == 3
+        @wrapper.precision.should == 2
+      end
+
+      describe "locations_for" do
+        before(:each) do
+          @wrapper.instance_variable_set :@min, -3
+          @wrapper.marginize
+        end
+        it "returns the right array" do
+          @wrapper.locations_for(17).should == [13, 14, 15, 16, 17] # TODO Correct?
+        end
+        it "returns the right array" do
+          @wrapper.locations_for(-3).should == [0, 1, 2, 3, 4]
+        end
+        it "returns the right array" do
+          @wrapper.locations_for(20).should == [14, 15, 16, 17, 18]
+        end
       end
     end
   end

diff --git a/server/spec/lib/tokenizers/index_spec.rb b/server/spec/lib/tokenizers/index_spec.rb
@@ -68,6 +68,7 @@ def self.it_should_tokenize_token(text, expected)
       #
       it_should_tokenize_token "splitting on \\s", [:splitting, :on, :"\\s"]
       it_should_tokenize_token 'und', [:und]
+      it_should_tokenize_token '7',   [:'7']
     end
   end
 

diff --git a/server/test_project/app/application.rb b/server/test_project/app/application.rb
@@ -29,33 +29,31 @@ class BookSearch < Application
     year          = category :year,   :qualifiers => [:y, :year, :annee]
     isbn          = category :isbn,   :qualifiers => [:i, :isbn]
 
-    main_index = index :main,
-                       Sources::DB.new('SELECT id, title, author, year FROM books', :file => 'app/db.yml'),
-                       similar_title,
-                       author,
-                       year
+    main_index = index :main, Sources::DB.new('SELECT id, title, author, year FROM books', :file => 'app/db.yml')
+    main_index.category similar_title
+    main_index.category author
+    main_index.category year
 
-    isbn_index = index :isbn,
-                       Sources::DB.new("SELECT id, isbn FROM books", :file => 'app/db.yml'),
-                       field(:isbn, :qualifiers => [:i, :isbn])
+    isbn_index = index :isbn, Sources::DB.new("SELECT id, isbn FROM books", :file => 'app/db.yml')
+    isbn_index.category :isbn, :qualifiers => [:i, :isbn]
 
-    # geo_index  = index :geo,
-    #                    Sources::CSV.new(:location, :north, :east, :file => 'data/locations.csv'),
-    #                    category(:location),
-    #                    geo_location(:north, grid: 10_000),
-    #                    geo_location(:east, grid: 10_000)
-    #                    # geo_location(:north, grid: 20_000, :as => :n20k),
-    #                    # geo_location(:east, grid: 20_000, :as => :e20k)
+    geo_index  = index :geo, Sources::CSV.new(:location, :north, :east, :file => 'data/locations.csv')
+    geo_index.category :location),
+    geo_index.category :north, :source => Sources::Wrappers::Location.new(source, grid:2), :tokenizer => Tokenizers::Index.new
+    geo_index.category :east,  :source => Sources::Wrappers::Location.new(source, grid:2), :tokenizer => Tokenizers::Index.new
+    # geo_index.location :north, grid: 2 # TODO partial does not make sense!
+    # geo_index.location :east,  grid: 2
+    # geo_location(:north, grid: 20_000, :as => :n20k),
+    # geo_location(:east, grid: 20_000, :as => :e20k)
+
+    csv_test_index = index :csv_test, Sources::CSV.new(:title,:author,:isbn,:year,:publisher,:subjects, :file => 'data/books.csv')
+    csv_test_index.category similar_title
+    csv_test_index.category author
+    csv_test_index.category isbn
+    csv_test_index.category year
+    csv_test_index.category :publisher, :qualifiers => [:p, :publisher]
+    csv_test_index.category :subjects, :qualifiers => [:s, :subject]
 
-    csv_test_index = index :csv_test,
-                           Sources::CSV.new(:title,:author,:isbn,:year,:publisher,:subjects, :file => 'data/books.csv'),
-                           similar_title,
-                           author,
-                           isbn,
-                           year,
-                           category(:publisher, :qualifiers => [:p, :publisher]),
-                           category(:subjects, :qualifiers => [:s, :subject])
-
 
     options = {
       :weights => {
@@ -71,16 +69,23 @@ class BookSearch < Application
     full_csv  = Query::Full.new csv_test_index, options
     live_csv  = Query::Live.new csv_test_index, options
 
-    full_isbn = Query::Full.new isbn_index, options
-    live_isbn = Query::Live.new isbn_index, options
+    full_isbn = Query::Full.new isbn_index
+    live_isbn = Query::Live.new isbn_index
+
+    full_geo  = Query::Full.new geo_index
+    live_geo  = Query::Live.new geo_index
 
     route %r{^/books/full} => full_main,
           %r{^/books/live} => live_main,
+
           %r{^/csv/full}   => full_csv,
           %r{^/csv/live}   => live_csv,
+
           %r{^/isbn/full}  => full_isbn,
-          %r{^/geo/live}   => live_geo,
+
           %r{^/geo/full}   => full_geo,
+          %r{^/geo/live}   => live_geo,
+
           %r{^/all/full}   => Query::Full.new(main_index, csv_test_index, isbn_index, geo_index, options),
           %r{^/all/live}   => Query::Live.new(main_index, csv_test_index, isbn_index, geo_index, options)