-
Notifications
You must be signed in to change notification settings - Fork 6
/
analysis.rb
67 lines (56 loc) · 2.18 KB
/
analysis.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
module Estella
module Analysis
# Default Elasticsearch analysers
extend ActiveSupport::Concern
FRONT_NGRAM_FILTER =
{ type: 'edgeNGram', min_gram: 2, max_gram: 15, side: 'front' }
DEFAULT_ANALYZER =
{ type: 'custom', tokenizer: 'standard_tokenizer', filter: %w[lowercase asciifolding] }
SNOWBALL_ANALYZER =
{ type: 'custom', tokenizer: 'standard_tokenizer', filter: %w[lowercase asciifolding snowball] }
SHINGLE_ANALYZER =
{ type: 'custom', tokenizer: 'standard_tokenizer', filter: %w[shingle lowercase asciifolding] }
NGRAM_ANALYZER =
{ type: 'custom', tokenizer: 'standard_tokenizer', filter: %w[lowercase asciifolding front_ngram_filter] }
EMAIL_ANALYZER =
{ type: 'custom', tokenizer: 'uax_url_email', filter: %w[lowercase stop] }
DEFAULT_ANALYSIS = {
tokenizer: {
standard_tokenizer: { type: 'standard' }
},
filter: {
front_ngram_filter: FRONT_NGRAM_FILTER
},
analyzer: {
default_analyzer: DEFAULT_ANALYZER,
snowball_analyzer: SNOWBALL_ANALYZER,
shingle_analyzer: SHINGLE_ANALYZER,
ngram_analyzer: NGRAM_ANALYZER,
email_analyzer: EMAIL_ANALYZER,
search_analyzer: DEFAULT_ANALYZER
}
}
DEFAULT_FIELDS = {
default: { type: 'text', analyzer: 'default_analyzer' },
snowball: { type: 'text', analyzer: 'snowball_analyzer' },
shingle: { type: 'text', analyzer: 'shingle_analyzer' },
ngram: { type: 'text', analyzer: 'ngram_analyzer', search_analyzer: 'search_analyzer' },
email: { type: 'text', analyzer: 'email_analyzer' }
}
DEFAULT_FIELD_FACTORS = {
default: 10,
ngram: 10,
snowball: 3,
shingle: 2,
search: 2,
email: 2
}
FULLTEXT_ANALYSIS = DEFAULT_FIELDS.keys
DEFAULT_SETTINGS = if defined? Rails && Rails.env == 'test'
# Ensure no sharding in test env in order to enforce deterministic scores.
{ analysis: DEFAULT_ANALYSIS, index: { number_of_shards: 1, number_of_replicas: 1 } }
else
{ analysis: DEFAULT_ANALYSIS }
end
end
end