Skip to content

Commit

Permalink
Edit distance = 1 for misspellings
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Aug 1, 2013
1 parent 5b454a9 commit a73fbe4
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 11 deletions.
10 changes: 7 additions & 3 deletions lib/searchkick/reindex.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ def searchkick_index_options
tokenizer: "standard",
# synonym should come last, after stemming and shingle
# shingle must come before snowball
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball", "searchkick_index_shingle"]
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball", "searchkick_index_shingle", "snowball"]
},
searchkick_search: {
type: "custom",
tokenizer: "standard",
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball", "searchkick_search_shingle"]
filter: ["standard", "lowercase", "asciifolding", "stop", "snowball", "searchkick_search_shingle", "snowball"]
},
searchkick_search2: {
type: "custom",
Expand Down Expand Up @@ -91,8 +91,12 @@ def searchkick_index_options
ignore_case: true,
synonyms: synonyms.select{|s| s.size > 1 }.map{|s| "#{s[0..-2].join(",")} => #{s[-1]}" }
}
# choosing a place for the synonym filter when stemming is not easy
# https://groups.google.com/forum/#!topic/elasticsearch/p7qcQlgHdB8
# TODO use a snowball stemmer on synonyms when creating the token filter
settings[:analysis][:analyzer][:default_index][:filter].insert(-4, "searchkick_synonym")
settings[:analysis][:analyzer][:default_index][:filter] << "searchkick_synonym"
settings[:analysis][:analyzer][:searchkick_search][:filter].insert(-2, "searchkick_synonym")
settings[:analysis][:analyzer][:searchkick_search][:filter].insert(-4, "searchkick_synonym")
settings[:analysis][:analyzer][:searchkick_search][:filter] << "searchkick_synonym"
settings[:analysis][:analyzer][:searchkick_search2][:filter] << "searchkick_synonym"
end
Expand Down
4 changes: 2 additions & 2 deletions lib/searchkick/search.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ def search(term, options = {})
match fields, term, boost: 10, operator: operator, analyzer: "searchkick_search2"
end
query do
match fields, term, use_dis_max: false, fuzziness: 0.7, max_expansions: 1, prefix_length: 1, operator: operator, analyzer: "searchkick_search"
match fields, term, use_dis_max: false, fuzziness: 1, max_expansions: 1, operator: operator, analyzer: "searchkick_search"
end
query do
match fields, term, use_dis_max: false, fuzziness: 0.7, max_expansions: 1, prefix_length: 1, operator: operator, analyzer: "searchkick_search2"
match fields, term, use_dis_max: false, fuzziness: 1, max_expansions: 1, operator: operator, analyzer: "searchkick_search2"
end
end
end
Expand Down
30 changes: 24 additions & 6 deletions test/searchkick_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

class Product < ActiveRecord::Base
searchkick \
settings: {
number_of_shards: 1
},
synonyms: [
["clorox", "bleach"],
["scallion", "greenonion"],
["saranwrap", "plasticwrap"],
["qtip", "cotton swab"],
["burger", "hamburger"],
["bandaid", "bandag"]
],
settings: {
number_of_shards: 1
}
]

attr_accessor :conversions

Expand Down Expand Up @@ -82,6 +82,24 @@ def test_misspelling_sriracha
assert_search "siracha", ["Sriracha"]
end

def test_short_word
store_names ["Finn"]
assert_search "fin", ["Finn"]
end

def test_edit_distance
store_names ["Bingo"]
assert_search "bin", []
assert_search "bing", ["Bingo"]
assert_search "bingoo", ["Bingo"]
assert_search "bingooo", []
assert_search "ringo", ["Bingo"]
assert_search "mango", []
store_names ["thisisareallylongword"]
assert_search "thisisareallylongwor", ["thisisareallylongword"] # missing letter
assert_search "thisisareelylongword", [] # edit distance = 2
end

def test_misspelling_tabasco
store_names ["Tabasco"]
assert_search "tobasco", ["Tabasco"]
Expand Down Expand Up @@ -124,8 +142,8 @@ def test_spaces_in_field
end

def test_spaces_in_query
store_names ["Dishwasher Soap"]
assert_search "dish washer", ["Dishwasher Soap"]
store_names ["Dishwasher"]
assert_search "dish washer", ["Dishwasher"]
end

def test_spaces_three_words
Expand Down

0 comments on commit a73fbe4

Please sign in to comment.