Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

FuzzySearch uses Elasticsearch Multi-Query API

Reduces HTTP requests from 5 to 1, searches happen much faster.
  • Loading branch information...
commit 7ac6080372c553d68ef236d7789ffd28c3f3eafc 1 parent fdcd5af
@saulius saulius authored
View
4 app/search_indexes/chapter_index.rb
@@ -1,4 +1,8 @@
class ChapterIndex < SearchIndex
+ def goods_nomenclature?
+ true
+ end
+
def definition
{
mappings: {
View
4 app/search_indexes/commodity_index.rb
@@ -1,4 +1,8 @@
class CommodityIndex < SearchIndex
+ def goods_nomenclature?
+ true
+ end
+
def definition
{
mappings: {
View
4 app/search_indexes/heading_index.rb
@@ -1,4 +1,8 @@
class HeadingIndex < SearchIndex
+ def goods_nomenclature?
+ true
+ end
+
def definition
{
mappings: {
View
4 app/search_indexes/search_index.rb
@@ -14,4 +14,8 @@ def type
def model
self.class.name.chomp("Index").constantize
end
+
+ def goods_nomenclature?
+ false
+ end
end
View
4 app/search_indexes/section_index.rb
@@ -1,4 +1,8 @@
class SectionIndex < SearchIndex
+ def goods_nomenclature?
+ true
+ end
+
def definition
{
mappings: {
View
16 app/services/search_service/fuzzy_search.rb
@@ -1,13 +1,19 @@
class SearchService
class FuzzySearch < BaseSearch
- autoload :FuzzyMatch, 'search_service/fuzzy_search/fuzzy_match'
- autoload :GoodsNomenclatureMatch, 'search_service/fuzzy_search/goods_nomenclature_match'
- autoload :ReferenceMatch, 'search_service/fuzzy_search/reference_match'
+ autoload :FuzzyQuery, 'search_service/fuzzy_search/fuzzy_query'
+ autoload :FuzzySearchResult, 'search_service/fuzzy_search/fuzzy_search_result'
+ autoload :GoodsNomenclatureQuery, 'search_service/fuzzy_search/goods_nomenclature_query'
+ autoload :ReferenceQuery, 'search_service/fuzzy_search/reference_query'
def search!
begin
- @results = { goods_nomenclature_match: GoodsNomenclatureMatch.for(query_string, date),
- reference_match: ReferenceMatch.for(query_string, date) }
+ @results ||= begin
+ FuzzySearchResult.new(query_string, date).each_with_object({}) do |(match_type, search_index, results), memo|
+ memo.deep_merge!({
+ match_type => { search_index.type.pluralize => results }
+ })
+ end
+ end
rescue Elasticsearch::Transport::Transport::Error
# rescue from malformed queries, return empty resultset in that case
BLANK_RESULT
View
17 app/services/search_service/fuzzy_search/fuzzy_match.rb
@@ -1,17 +0,0 @@
-class SearchService
- class FuzzySearch < BaseSearch
- class FuzzyMatch
- attr_reader :query_string,
- :date
-
- def self.for(*args)
- new(*args).serializable_hash
- end
-
- def initialize(query_string, date)
- @query_string = query_string
- @date = date
- end
- end
- end
-end
View
19 app/services/search_service/fuzzy_search/fuzzy_query.rb
@@ -0,0 +1,19 @@
+class SearchService
+ class FuzzySearch < BaseSearch
+ class FuzzyQuery
+ attr_reader :query_string,
+ :date,
+ :index
+
+ def initialize(query_string, date, index)
+ @query_string = query_string
+ @date = date
+ @index = index
+ end
+
+ def match_type
+ :"#{self.class.name.demodulize.chomp("Query").underscore}_match"
+ end
+ end
+ end
+end
View
53 app/services/search_service/fuzzy_search/fuzzy_search_result.rb
@@ -0,0 +1,53 @@
+class SearchService
+ class FuzzySearch < BaseSearch
+ class FuzzySearchResult
+ include Enumerable
+
+ QUERY_OPTIONS = {
+ goods_nomenclature_match: {
+ 'tariff-sections' => { fields: ["title"] }
+ }
+ }
+
+ def initialize(query_string, date)
+ @query_string = query_string
+ @date = date
+ end
+
+ # Elasticsearch multisearch endpoint returns results in the same
+ # order as the queries were passed so we rely on #each_query to
+ # yield the queries for search and result output
+ #
+ # More here http://www.elasticsearch.org/guide/en/elasticsearch/reference/current/search-multi-search.html
+ def each
+ return to_enum(:each) unless block_given?
+ each_query.each_with_index do |(match_type, search_index, _), idx|
+ yield match_type, search_index, search_results[idx].hits.hits
+ end
+ end
+
+ def search_results
+ @search_results ||= TradeTariffBackend.search_client.msearch(
+ body: each_query.map { |_, _, query| query }
+ ).responses
+ end
+
+ def each_query(&block)
+ return to_enum(:each_query) unless block_given?
+
+ TradeTariffBackend.search_indexes.select(&:goods_nomenclature?).each do |search_index|
+ [
+ GoodsNomenclatureQuery.new(@query_string, @date, search_index),
+ ReferenceQuery.new(@query_string, @date, search_index)
+ ].each do |search_query|
+ yield search_query.match_type,
+ search_query.index,
+ search_query.query(
+ QUERY_OPTIONS.fetch(search_query.match_type, {}).fetch(search_query.index.name, {})
+ )
+ end
+ end
+ end
+ end
+ end
+end
View
74 app/services/search_service/fuzzy_search/goods_nomenclature_match.rb
@@ -1,74 +0,0 @@
-class SearchService
- class FuzzySearch < BaseSearch
- class GoodsNomenclatureMatch < FuzzyMatch
- def serializable_hash
- {
- sections: search_results_for('tariff-sections', query_string: { fields: ["title"] }),
- chapters: search_results_for('tariff-chapters'),
- headings: search_results_for('tariff-headings'),
- commodities: search_results_for('tariff-commodities')
- }
- end
-
- private
-
- def search_results_for(index, query_opts = {})
- TradeTariffBackend.search_client.search(index: index,
- body:
- { query: {
- constant_score: {
- filter: {
- and: [
- {
- # match the search phrase
- query: {
- query_string: {
- query: query_string,
- fields: ["description"]
- }.merge(query_opts)
- }
- },
- {
- or: [
- # actual date is either between item's (validity_start_date..validity_end_date)
- {
- and: [
- range: {
- validity_start_date: { lte: date }
- },
- range: {
- validity_end_date: { gte: date }
- }
- ]
- },
- # or is greater than item's validity_start_date
- # and item has blank validity_end_date (is unbounded)
- {
- and: [
- {
- range: {
- validity_start_date: { lte: date }
- }
- },
- {
- missing: {
- field: "validity_end_date",
- null_value: true,
- existence: true
- }
- }
- ]
- }
- ]
- }
- ]
- }
- }
- },
- size: INDEX_SIZE_MAX
- }
- ).hits.hits
- end
- end
- end
-end
View
65 app/services/search_service/fuzzy_search/goods_nomenclature_query.rb
@@ -0,0 +1,65 @@
+class SearchService
+ class FuzzySearch < BaseSearch
+ class GoodsNomenclatureQuery < FuzzyQuery
+ def query(query_opts = {})
+ {
+ index: index.name,
+ type: index.type,
+ search: {
+ query: {
+ constant_score: {
+ filter: {
+ and: [
+ {
+ # match the search phrase
+ query: {
+ query_string: {
+ query: query_string,
+ fields: ["description"]
+ }.merge(query_opts)
+ }
+ },
+ {
+ or: [
+ # actual date is either between item's (validity_start_date..validity_end_date)
+ {
+ and: [
+ range: {
+ validity_start_date: { lte: date }
+ },
+ range: {
+ validity_end_date: { gte: date }
+ }
+ ]
+ },
+ # or is greater than item's validity_start_date
+ # and item has blank validity_end_date (is unbounded)
+ {
+ and: [
+ {
+ range: {
+ validity_start_date: { lte: date }
+ }
+ },
+ {
+ missing: {
+ field: "validity_end_date",
+ null_value: true,
+ existence: true
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+ }
+ }
+ },
+ size: INDEX_SIZE_MAX
+ }
+ }
+ end
+ end
+ end
+end
View
127 app/services/search_service/fuzzy_search/reference_match.rb
@@ -1,127 +0,0 @@
-class SearchService
- class FuzzySearch < BaseSearch
- class ReferenceMatch < FuzzyMatch
- autoload :ResultQuery, 'search_service/fuzzy_search/reference_match/result_query'
-
- def sections
- @sections ||= ResultQuery.new(search_results).for('Section')
- .uniq_by('id')
- .sort_by('position')
- .all
- end
-
- def chapters
- @chapters ||= ResultQuery.new(search_results).for('Chapter')
- .uniq_by('goods_nomenclature_item_id')
- .sort_by('goods_nomenclature_item_id')
- .all
- end
-
- def headings
- @headings ||= ResultQuery.new(search_results).for('Heading')
- .uniq_by('goods_nomenclature_item_id')
- .sort_by('goods_nomenclature_item_id')
- .all
- end
-
- def commodities
- @commodities ||= ResultQuery.new(search_results).for('Commodity')
- .uniq_by('goods_nomenclature_item_id')
- .sort_by('goods_nomenclature_item_id')
- .all
- end
-
- def serializable_hash
- {
- sections: sections,
- chapters: chapters,
- headings: headings,
- commodities: commodities
- }
- end
-
- private
-
- def search_results
- @search_results ||= TradeTariffBackend.search_client.search(
- index: 'tariff-search_references', body: {
- query: {
- filtered: {
- query: {
- query_string: {
- fields: ['title'],
- analyzer: 'snowball',
- query: query_string
- }
- },
- filter: {
- nested: {
- path: "reference",
- query: {
- filtered: {
- query: { match_all: {} },
- filter: {
- or: [
- # actual date is either between item's (validity_start_date..validity_end_date)
- {
- and: [
- range: {
- "reference.validity_start_date" => { lte: date }
- },
- range: {
- "reference.validity_end_date" => { gte: date }
- }
- ]
- },
- # or is greater than item's validity_start_date
- # and item has blank validity_end_date (is unbounded)
- {
- and: [
- {
- range: {
- "reference.validity_start_date" => { lte: date }
- }
- },
- {
- missing: {
- field: "reference.validity_end_date",
- null_value: true,
- existence: true
- }
- }
- ]
- },
- # Sections do not have validity start/end dates
- {
- and: [
- {
- missing: {
- field: "reference.validity_start_date",
- null_value: true,
- existence: true
- }
- },
- {
- missing: {
- field: "reference.validity_end_date",
- null_value: true,
- existence: true
- }
- }
- ]
- }
- ]
- }
- }
- }
- }
- }
- },
- },
- size: INDEX_SIZE_MAX
- }
- ).hits.hits
- end
- end
- end
-end
View
41 app/services/search_service/fuzzy_search/reference_match/result_query.rb
@@ -1,41 +0,0 @@
-class SearchService
- class FuzzySearch < BaseSearch
- class ReferenceMatch
- class ResultQuery
- attr_reader :result
-
- def initialize(results = [])
- @results = results.clone
- end
-
- def for(entity_name)
- @results = @results.select { |result|
- result._source.reference['class'] == entity_name
- }
-
- self
- end
-
- def uniq_by(uniq_key)
- @results = @results.uniq { |result|
- result._source.reference[uniq_key]
- }
-
- self
- end
-
- def sort_by(sort_key)
- @results = @results.sort_by { |result|
- result._source.reference[sort_key]
- }
-
- self
- end
-
- def all
- @results
- end
- end
- end
- end
-end
View
95 app/services/search_service/fuzzy_search/reference_query.rb
@@ -0,0 +1,95 @@
+class SearchService
+ class FuzzySearch < BaseSearch
+ class ReferenceQuery < FuzzyQuery
+ def query(*)
+ {
+ index: "#{TradeTariffBackend.search_namespace}-search_references",
+ type: 'search_reference',
+ search: {
+ query: {
+ filtered: {
+ query: {
+ query_string: {
+ fields: ['title'],
+ analyzer: 'snowball',
+ query: query_string
+ }
+ },
+ filter: {
+ and: [
+ {
+ term: { reference_class: index.type }
+ },
+ {
+ nested: {
+ path: "reference",
+ query: {
+ filtered: {
+ query: { match_all: {} },
+ filter: {
+ or: [
+ # actual date is either between item's (validity_start_date..validity_end_date)
+ {
+ and: [
+ range: {
+ "reference.validity_start_date" => { lte: date }
+ },
+ range: {
+ "reference.validity_end_date" => { gte: date }
+ }
+ ]
+ },
+ # or is greater than item's validity_start_date
+ # and item has blank validity_end_date (is unbounded)
+ {
+ and: [
+ {
+ range: {
+ "reference.validity_start_date" => { lte: date }
+ }
+ },
+ {
+ missing: {
+ field: "reference.validity_end_date",
+ null_value: true,
+ existence: true
+ }
+ }
+ ]
+ },
+ # Sections do not have validity start/end dates
+ {
+ and: [
+ {
+ missing: {
+ field: "reference.validity_start_date",
+ null_value: true,
+ existence: true
+ }
+ },
+ {
+ missing: {
+ field: "reference.validity_end_date",
+ null_value: true,
+ existence: true
+ }
+ }
+ ]
+ }
+ ]
+ }
+ }
+ }
+ }
+ }
+ ]
+ }
+ },
+ },
+ size: INDEX_SIZE_MAX
+ }
+ }
+ end
+ end
+ end
+end
View
6 lib/trade_tariff_backend.rb
@@ -109,6 +109,12 @@ def indexed_models
[Chapter, Commodity, Heading, SearchReference, Section]
end
+ def search_indexes
+ indexed_models.map { |model|
+ "#{model}Index".constantize.new(search_namespace)
+ }
+ end
+
def model_serializer_for(model)
"#{model}Serializer".constantize
end
View
4 lib/trade_tariff_backend/search_client.rb
@@ -18,6 +18,10 @@ def search(*)
Hashie::Mash.new(super)
end
+ def msearch(*)
+ Hashie::Mash.new(super)
+ end
+
def reindex
indexed_models.each do |model|
search_index_for(model).tap do |index|
Please sign in to comment.
Something went wrong with that request. Please try again.