diff --git a/lib/search.rb b/lib/search.rb index d89e231560f39..d0e296a93068a 100644 --- a/lib/search.rb +++ b/lib/search.rb @@ -838,13 +838,14 @@ def posts_query(limit, opts = nil) posts = posts.order("posts.like_count DESC") end else - # 0|32 default normalization scaled into the range zero to one + # 2|32 divides the rank by the document length and scales the range from + # zero to one data_ranking = <<~SQL ( TS_RANK_CD( post_search_data.search_data, #{ts_query(weight_filter: weights)}, - 0|32 + 2|32 ) * ( CASE categories.search_priority diff --git a/spec/components/search_spec.rb b/spec/components/search_spec.rb index 18dd5d9022e7a..b759b815f72cb 100644 --- a/spec/components/search_spec.rb +++ b/spec/components/search_spec.rb @@ -334,6 +334,27 @@ def new_post(raw, topic = nil) expect(result.posts).to contain_exactly(reply) expect(result.blurb(reply)).to eq(expected_blurb) end + + it 'does not allow a post with repeated words to dominate the ranking' do + category = Fabricate(:category, name: "winter is coming") + + post = Fabricate(:post, + raw: "I think winter will end soon", + topic: Fabricate(:topic, + title: "dragon john snow winter", + category: category + ) + ) + + post2 = Fabricate(:post, + raw: "I think winter winter winter winter winter will end soon", + topic: Fabricate(:topic, title: "dragon john snow summer", category: category) + ) + + result = Search.execute('winter') + + expect(result.posts).to eq([post, post2, category.topic.first_post]) + end end context 'searching for quoted title' do @@ -940,22 +961,45 @@ def search today = Date.today yesterday = 1.day.ago two_days_ago = 2.days.ago + category = Fabricate(:category) + + old_topic = Fabricate(:topic, + title: 'First Topic, testing the created_at sort', + created_at: two_days_ago, + category: category + ) - old_topic = Fabricate(:topic, - title: 'First Topic, testing the created_at sort', - created_at: two_days_ago) latest_topic = Fabricate(:topic, - title: 'Second Topic, testing the created_at sort', - created_at: yesterday) + title: 'Second Topic, testing the created_at sort', + created_at: yesterday, + category: category + ) + + old_relevant_topic_post = Fabricate(:post, + topic: old_topic, + created_at: yesterday, + raw: 'Relevant Relevant Topic' + ) - old_relevant_topic_post = Fabricate(:post, topic: old_topic, created_at: yesterday, raw: 'Relevant Topic') - latest_irelevant_topic_post = Fabricate(:post, topic: latest_topic, created_at: today, raw: 'Not Relevant') + latest_irelevant_topic_post = Fabricate(:post, + topic: latest_topic, + created_at: today, + raw: 'Not Relevant' + ) # Expecting the default results - expect(Search.execute('Topic').posts.map(&:id)).to eq([old_relevant_topic_post.id, latest_irelevant_topic_post.id]) + expect(Search.execute('Topic').posts).to contain_exactly( + old_relevant_topic_post, + latest_irelevant_topic_post, + category.topic.first_post + ) # Expecting the ordered by topic creation results - expect(Search.execute('Topic order:latest_topic').posts.map(&:id)).to eq([latest_irelevant_topic_post.id, old_relevant_topic_post.id]) + expect(Search.execute('Topic order:latest_topic').posts).to contain_exactly( + latest_irelevant_topic_post, + old_relevant_topic_post, + category.topic.first_post + ) end it 'can tokenize dots' do