Skip to content

Commit

Permalink
FIX: Don't inject extra terms for version lexeme.
Browse files Browse the repository at this point in the history
  • Loading branch information
tgxworld committed Jul 27, 2020
1 parent ce53180 commit b70f108
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 7 deletions.
16 changes: 9 additions & 7 deletions app/services/search_indexer.rb
Expand Up @@ -59,13 +59,15 @@ def self.update_index(table: , id: , raw_data:)
tsvector.scan(/'(([a-zA-Z0-9]+\.)+[a-zA-Z0-9]+)'\:([\w+,]+)/).reduce(additional_lexemes) do |array, (lexeme, _, positions)|
count = 0

loop do
count += 1
break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots
term, _, remaining = lexeme.partition(".")
break if remaining.blank?
array << "'#{term}':#{positions} '#{remaining}':#{positions}"
lexeme = remaining
if lexeme !~ /^(\d+\.)?(\d+\.)?(\*|\d+)$/
loop do
count += 1
break if count >= 10 # Safeguard here to prevent infinite loop when a term has many dots
term, _, remaining = lexeme.partition(".")
break if remaining.blank?
array << "'#{term}':#{positions} '#{remaining}':#{positions}"
lexeme = remaining
end
end

array
Expand Down
9 changes: 9 additions & 0 deletions spec/services/search_indexer_spec.rb
Expand Up @@ -141,6 +141,15 @@ def scrub(html, strip_diacritics: false)
)
end

it 'should not tokenize versions' do
post.topic.update!(title: "this is a title that I am testing")
post.update!(raw: '1.2.2')

expect(post.post_search_data.search_data).to eq(
"'1.2.2':10 'test':8A 'titl':4A 'uncategor':9B"
)
end

it 'should tokenize host of a URL and removes query string' do
category = Fabricate(:category, name: 'awesome category')
topic = Fabricate(:topic, category: category, title: 'this is a test topic')
Expand Down

0 comments on commit b70f108

Please sign in to comment.