Skip to content
This repository
Browse code

+ expose ignore_unassigned_tokens to the user

  • Loading branch information...
commit 28908cc28caced9eda29fed4ada53f93aef13596 1 parent 41f2ca6
Florian R. Hanke authored September 04, 2011
15  server/lib/picky/categories.rb
@@ -15,23 +15,8 @@ class Categories
15 15
 
16 16
     # A list of indexed categories.
17 17
     #
18  
-    # Options:
19  
-    #  * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
20  
-    #                              The default behaviour is that if a token does not match to
21  
-    #                              any category, the query will not return anything (since a
22  
-    #                              single token cannot be matched). If you set this option to
23  
-    #                              true, any token that cannot be matched to a category will be
24  
-    #                              simply ignored.
25  
-    #                              Use this if only a few matched words are important, like for
26  
-    #                              example of the query "Jonathan Myers 86455 Las Cucarachas"
27  
-    #                              you only want to match the zipcode, to have the search engine
28  
-    #                              display advertisements on the side for the zipcode.
29  
-    #                              Nifty! :)
30  
-    #
31 18
     def initialize options = {}
32 19
       clear_categories
33  
-
34  
-      @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
35 20
     end
36 21
 
37 22
     # Clears both the array of categories and the hash of categories.
9  server/lib/picky/categories_indexed.rb
@@ -2,8 +2,6 @@ module Picky
2 2
 
3 3
   class Categories
4 4
 
5  
-    attr_reader :ignore_unassigned_tokens
6  
-
7 5
     each_delegate :load_from_cache,
8 6
                   :analyze,
9 7
                   :to => :categories
@@ -56,20 +54,13 @@ def inject_possible_for tokens
56 54
     # Returns possible Combinations for the token.
57 55
     #
58 56
     # Note: The preselected_categories param is an optimization.
59  
-    #
60 57
     # Note: Returns [] if no categories matched (will produce no result).
61  
-    #       Returns nil if this token needs to be removed from the query.
62  
-    #       (Also none of the categories matched, but the ignore unassigned
63  
-    #       tokens option is true)
64 58
     #
65 59
     def possible_for token, preselected_categories = nil
66 60
       possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
67 61
         combination = category.combination_for token
68 62
         combination ? combinations << combination : combinations
69 63
       end
70  
-      # This is an optimization to mark tokens that are ignored.
71  
-      #
72  
-      return if ignore_unassigned_tokens && possible.empty?
73 64
       possible
74 65
     end
75 66
 
11  server/lib/picky/index.rb
@@ -98,7 +98,7 @@ class Index
98 98
     # === Parameters
99 99
     # * name: A name that will be used for the index directory and in the Picky front end.
100 100
     #
101  
-    # === Options (all are used in the block, see examples)
  101
+    # === Options (all are used in the block - not passed as a Hash, see examples)
102 102
     # * source: Where the data comes from, e.g. Sources::CSV.new(...). Optional, can be defined in the block using #source.
103 103
     # * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
104 104
     # * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
@@ -113,12 +113,9 @@ class Index
113 113
     #     result_identifier :my_special_results
114 114
     #   end
115 115
     #
116  
-    def initialize name, options = {}
117  
-      @name              = name.to_sym
118  
-
119  
-      # TODO Move ignore_unassigned_tokens to query, somehow. Then, remove options.
120  
-      #
121  
-      @categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
  116
+    def initialize name
  117
+      @name       = name.to_sym
  118
+      @categories = Categories.new
122 119
 
123 120
       # Centralized registry.
124 121
       #
19  server/lib/picky/query/tokens.rb
@@ -8,20 +8,23 @@ module Query
8 8
     #
9 9
     class Tokens # :nodoc:all
10 10
 
  11
+      attr_reader :ignore_unassigned
  12
+
11 13
       # Basically delegates to its internal tokens array.
12 14
       #
13 15
       self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
14 16
 
15 17
       # Create a new Tokens object with the array of tokens passed in.
16 18
       #
17  
-      def initialize tokens = []
18  
-        @tokens = tokens
  19
+      def initialize tokens, ignore_unassigned = false
  20
+        @tokens            = tokens
  21
+        @ignore_unassigned = ignore_unassigned
19 22
       end
20 23
 
21 24
       # Creates a new Tokens object from a number of Strings.
22 25
       #
23  
-      def self.processed words, originals
24  
-        new words.zip(originals).collect! { |word, original| Token.processed word, original }
  26
+      def self.processed words, originals, ignore_unassigned = false
  27
+        new words.zip(originals).collect! { |word, original| Token.processed word, original }, ignore_unassigned
25 28
       end
26 29
 
27 30
       # Tokenizes each token.
@@ -45,14 +48,16 @@ def possible_combinations_in index
45 48
         @tokens.inject([]) do |combinations, token|
46 49
           possible_combinations = token.possible_combinations_in index
47 50
 
48  
-          # TODO Could move the ignore_unassigned_tokens here!
49  
-          #
50 51
           # Note: Optimization for ignoring tokens that allocate to nothing and
51 52
           # can be ignored.
52 53
           # For example in a special search, where "florian" is not
53 54
           # mapped to any category.
54 55
           #
55  
-          possible_combinations ? combinations << possible_combinations : combinations
  56
+          if ignore_unassigned && possible_combinations.empty?
  57
+            combinations
  58
+          else
  59
+            combinations << possible_combinations
  60
+          end
56 61
         end
57 62
       end
58 63
 
30  server/lib/picky/search.rb
@@ -44,6 +44,7 @@ def initialize *index_definitions
44 44
 
45 45
       @tokenizer ||= Tokenizer.query_default # THINK Not dynamic. Ok?
46 46
       @weights   ||= Query::Weights.new
  47
+      @ignore_unassigned = false if @ignore_unassigned.nil?
47 48
 
48 49
       self
49 50
     end
@@ -99,6 +100,33 @@ def boost weights
99 100
       end
100 101
     end
101 102
 
  103
+    # Ignore the given token if it cannot be matched to a category.
  104
+    # The default behaviour is that if a token does not match to
  105
+    # any category, the query will not return anything (since a
  106
+    # single token cannot be matched). If you set this option to
  107
+    # true, any token that cannot be matched to a category will be
  108
+    # simply ignored.
  109
+    #
  110
+    # Use this if only a few matched words are important, like for
  111
+    # example of the query "Jonathan Myers 86455 Las Cucarachas"
  112
+    # you only want to match the zipcode, to have the search engine
  113
+    # display advertisements on the side for the zipcode.
  114
+    #
  115
+    # False by default.
  116
+    #
  117
+    # Example:
  118
+    #   search = Search.new(books_index, dvd_index, mp3_index) do
  119
+    #     ignore_unassigned_tokens true
  120
+    #   end
  121
+    #
  122
+    # With this set to true, if in "Peter Flunder", "Flunder"
  123
+    # couldn't be assigned to any category, it will simply be
  124
+    # ignored. This is done for each categorization.
  125
+    #
  126
+    def ignore_unassigned_tokens value
  127
+      @ignore_unassigned = value
  128
+    end
  129
+
102 130
     # This is the main entry point for a query.
103 131
     # Use this in specs and also for running queries.
104 132
     #
@@ -146,7 +174,7 @@ def execute tokens, ids, offset, original_text = nil
146 174
     #
147 175
     def tokenized text
148 176
       tokens, originals = tokenizer.tokenize text
149  
-      tokens = Query::Tokens.processed tokens, originals || tokens
  177
+      tokens = Query::Tokens.processed tokens, originals || tokens, @ignore_unassigned
150 178
       tokens.partialize_last # Note: In the standard Picky search, the last token is always partial.
151 179
       tokens
152 180
     end
18  server/spec/lib/categories_indexed_spec.rb
... ...
@@ -1,24 +1,6 @@
1 1
 require 'spec_helper'
2 2
 
3 3
 describe Picky::Categories do
4  
-  context 'with option ignore_unassigned_tokens' do
5  
-    context 'ignore_unassigned_tokens true' do
6  
-      before(:each) do
7  
-        @categories = described_class.new ignore_unassigned_tokens: true
8  
-      end
9  
-      it 'should return the right value' do
10  
-        @categories.ignore_unassigned_tokens.should == true
11  
-      end
12  
-    end
13  
-    context 'ignore_unassigned_tokens false' do
14  
-      before(:each) do
15  
-        @categories = described_class.new ignore_unassigned_tokens: false
16  
-      end
17  
-      it 'should return the right value' do
18  
-        @categories.ignore_unassigned_tokens.should == false
19  
-      end
20  
-    end
21  
-  end
22 4
   
23 5
   context "with real categories" do
24 6
     before(:each) do
52  server/spec/lib/query/tokens_spec.rb
@@ -2,6 +2,53 @@
2 2
 
3 3
 describe Picky::Query::Tokens do
4 4
   
  5
+  context 'with ignore_unassigned_tokens true' do
  6
+    it 'generates processed tokens from all words' do
  7
+      expected = [
  8
+        Picky::Query::Token.processed('this~'),
  9
+        Picky::Query::Token.processed('is'),
  10
+        Picky::Query::Token.processed('a'),
  11
+        Picky::Query::Token.processed('sp:solr'),
  12
+        Picky::Query::Token.processed('query"')
  13
+      ]
  14
+      
  15
+      described_class.should_receive(:new).once.with expected, true
  16
+      
  17
+      described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], [], true
  18
+    end
  19
+    
  20
+    describe 'possible_combinations_in' do
  21
+      before(:each) do
  22
+        @token1 = stub :token1
  23
+        @token2 = stub :token2
  24
+        @token3 = stub :token3
  25
+
  26
+        @tokens = described_class.new [@token1, @token2, @token3], true
  27
+      end
  28
+      it 'should work correctly' do
  29
+        @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
  30
+        @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination21]
  31
+        @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
  32
+
  33
+        @tokens.possible_combinations_in(:some_index).should == [
  34
+          [:combination11, :combination12],
  35
+          [:combination21],
  36
+          [:combination31, :combination32, :combination33]
  37
+        ]
  38
+      end
  39
+      it 'should work correctly' do
  40
+        @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
  41
+        @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return []
  42
+        @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
  43
+
  44
+        @tokens.possible_combinations_in(:some_index).should == [
  45
+          [:combination11, :combination12],
  46
+          [:combination31, :combination32, :combination33]
  47
+        ]
  48
+      end
  49
+    end
  50
+  end
  51
+  
5 52
   describe '.processed' do
6 53
     it 'generates processed tokens from all words' do
7 54
       expected = [
@@ -12,7 +59,7 @@
12 59
         Picky::Query::Token.processed('query"')
13 60
       ]
14 61
       
15  
-      described_class.should_receive(:new).once.with expected
  62
+      described_class.should_receive(:new).once.with expected, false
16 63
       
17 64
       described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
18 65
     end
@@ -25,7 +72,7 @@
25 72
         Picky::Query::Token.processed('query"')
26 73
       ]
27 74
       
28  
-      described_class.should_receive(:new).once.with expected
  75
+      described_class.should_receive(:new).once.with expected, false
29 76
       
30 77
       described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
31 78
     end
@@ -114,6 +161,7 @@
114 161
 
115 162
       @tokens.possible_combinations_in(:some_index).should == [
116 163
         [:combination11, :combination12],
  164
+        nil,
117 165
         [:combination31, :combination32, :combination33]
118 166
       ]
119 167
     end
4  server/test_project/app/application.rb
@@ -252,6 +252,10 @@ def initialize isbn
252 252
     route %r{\A/admin\Z}           => Picky::LiveParameters.new
253 253
 
254 254
     route %r{\A/books\Z}           => (Picky::Search.new(books_index, isbn_index) do boost weights end),
  255
+          %r{\A/books_ignoring\Z}  => (Picky::Search.new(books_index, isbn_index) do
  256
+                                         boost weights
  257
+                                         ignore_unassigned_tokens true
  258
+                                      end),
255 259
           %r{\A/book_each\Z}       => (Picky::Search.new(book_each_index) do
256 260
                                          boost weights
257 261
                                          ignore :title
6  server/test_project/spec/integration_spec.rb
@@ -11,6 +11,7 @@
11 11
   end
12 12
 
13 13
   let(:books)           { Picky::TestClient.new(described_class, :path => '/books')           }
  14
+  let(:books_ignoring)  { Picky::TestClient.new(described_class, :path => '/books_ignoring')  }
14 15
   let(:book_each)       { Picky::TestClient.new(described_class, :path => '/book_each')       }
15 16
   let(:csv)             { Picky::TestClient.new(described_class, :path => '/csv')             }
16 17
   let(:redis)           { Picky::TestClient.new(described_class, :path => '/redis')           }
@@ -119,6 +120,11 @@
119 120
   it { csv.search('soledad human').ids.should == [72] }
120 121
   it { csv.search('first three minutes weinberg').ids.should == [1] }
121 122
 
  123
+  # Standard tests with ignoring unassigned.
  124
+  #
  125
+  it { books_ignoring.search('soledad human quack').ids.should == [72] }
  126
+  it { books_ignoring.search('first quack three quack minutes weinberg quack').ids.should == [1] }
  127
+
122 128
   # "Symbol" keys.
123 129
   #
124 130
   it { sym.search('key').ids.should == ['a', 'b', 'c', 'd', 'e', 'f'] }
13  server/test_project_sinatra/app.rb
@@ -39,7 +39,7 @@ class BookSearch < Sinatra::Application
39 39
             case_sensitive:              true,
40 40
             maximum_tokens:              5
41 41
 
42  
-  books_index = Index.new :books, result_identifier: 'boooookies' do
  42
+  books_index = Index.new :books do
43 43
     source   Sources::DB.new('SELECT id, title, author, year FROM books', file: 'db.yml')
44 44
     category :id
45 45
     category :title,
@@ -48,6 +48,8 @@ class BookSearch < Sinatra::Application
48 48
              similarity: Similarity::DoubleMetaphone.new(2)
49 49
     category :author, partial: Partial::Substring.new(:from => -2)
50 50
     category :year, qualifiers: [:y, :year, :annee]
  51
+
  52
+    result_identifier 'boooookies'
51 53
   end
52 54
 
53 55
   class Book < ActiveRecord::Base; end
@@ -258,13 +260,20 @@ def initialize isbn
258 260
     [:author, :year]  => +2
259 261
   }
260 262
 
261  
-  # This looks horrible – but usually you have it only once.
  263
+  # This looks horrible – but usually you have it only once or twice.
262 264
   # It's flexible.
263 265
   #
264 266
   books_search = Search.new books_index, isbn_index do boost weights end
265 267
   get %r{\A/books\Z} do
266 268
     books_search.search(params[:query], params[:ids] || 20, params[:offset] || 0).to_json
267 269
   end
  270
+  books_ignoring_search = Search.new books_index, isbn_index do
  271
+                             boost weights
  272
+                             ignore_unassigned_tokens true
  273
+                          end
  274
+  get %r{\A/books_ignoring\Z} do
  275
+    books_ignoring_search.search(params[:query], params[:ids] || 20, params[:offset] || 0).to_json
  276
+  end
268 277
   book_each_search = Search.new book_each_index do
269 278
                        boost weights
270 279
                        ignore :title
6  server/test_project_sinatra/spec/integration_spec.rb
@@ -11,6 +11,7 @@
11 11
   end
12 12
 
13 13
   let(:books)           { Picky::TestClient.new(described_class, :path => '/books')           }
  14
+  let(:books_ignoring)  { Picky::TestClient.new(described_class, :path => '/books_ignoring')  }
14 15
   let(:book_each)       { Picky::TestClient.new(described_class, :path => '/book_each')       }
15 16
   let(:csv)             { Picky::TestClient.new(described_class, :path => '/csv')             }
16 17
   let(:redis)           { Picky::TestClient.new(described_class, :path => '/redis')           }
@@ -119,6 +120,11 @@
119 120
   it { csv.search('soledad human').ids.should == [72] }
120 121
   it { csv.search('first three minutes weinberg').ids.should == [1] }
121 122
 
  123
+  # Standard tests with ignoring unassigned.
  124
+  #
  125
+  it { books_ignoring.search('soledad human quack').ids.should == [72] }
  126
+  it { books_ignoring.search('first quack three quack minutes weinberg quack').ids.should == [1] }
  127
+
122 128
   # "Symbol" keys.
123 129
   #
124 130
   it { sym.search('key').ids.should == ['a', 'b', 'c', 'd', 'e', 'f'] }

0 notes on commit 28908cc

Please sign in to comment.
Something went wrong with that request. Please try again.