Skip to content

Commit

Permalink
Add support for Opensearch 2.12; Bring field analyzers in line with b10
Browse files Browse the repository at this point in the history
  • Loading branch information
akotlar committed Mar 10, 2024
1 parent 364dde1 commit 159bdcf
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 26 deletions.
116 changes: 103 additions & 13 deletions config/hg19.mapping.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,31 @@ index_settings:
- uppercase
- asciifolding
filter:
exclude_pathogenic:
type: pattern_capture
patterns: ["conflicting_interpretations_of_pathogenicity"]
catenate_filter:
type: word_delimiter
catenate_words: true
catenate_numbers: true
catenate_all: true
preserve_original: false
generate_word_parts: false
stem_english_possessive: true
generate_number_parts: false
split_on_numerics: false
split_on_case_change: false
catenate_filter_split:
type: word_delimiter
catenate_words: true
catenate_numbers: true
catenate_all: true
preserve_original: false
generate_word_parts: true
stem_english_possessive: true
generate_number_parts: false
split_on_numerics: false
split_on_case_change: true
english_stemmer:
type: stemmer
language: light_english
Expand Down Expand Up @@ -71,6 +96,8 @@ index_settings:
- digit
- punctuation
tokenizer:
hgvs_tokenizer:
type: pattern
edge_ngram_tokenizer:
type: edge_ngram
min_gram: 1
Expand All @@ -80,39 +107,54 @@ index_settings:
- digit
- punctuation
analyzer:
hgvs_analyzer:
type: custom
tokenizer: hgvs_tokenizer
filter:
- lowercase
autocomplete_english:
type: custom
tokenizer: classic
tokenizer: whitespace
filter:
- lowercase
- asciifolding
- catenate_filter
- english_stemmer
- autocomplete_filter
autocomplete_english_graph:
autocomplete_english_split:
type: custom
tokenizer: keyword
tokenizer: whitespace
filter:
- word_delimiter_graph
- lowercase
- asciifolding
- catenate_filter_split
- english_stemmer
- autocomplete_filter
english_graph:
autocomplete_english_split_clinsig:
type: custom
tokenizer: whitespace
filter:
- lowercase
- exclude_pathogenic
- asciifolding
- catenate_filter_split
autocomplete_english_graph:
type: custom
tokenizer: keyword
filter:
- word_delimiter_graph
- lowercase
- asciifolding
- english_stemmer
autocomplete_english_split:
- autocomplete_filter
english_graph:
type: custom
tokenizer: standard
tokenizer: keyword
filter:
- word_delimiter_graph
- lowercase
- asciifolding
- english_stemmer
- autocomplete_filter
autocomplete_english_letter:
type: custom
tokenizer: letter
Expand Down Expand Up @@ -319,6 +361,10 @@ mappings:
type: text
analyzer: autocomplete_english_split
search_analyzer: search_english_description_synonyms
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
rfamAcc:
type: keyword
normalizer: uppercase_normalizer
Expand All @@ -333,10 +379,18 @@ mappings:
type: text
analyzer: autocomplete_english_split
search_analyzer: search_english_description_synonyms
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
clinicalSignificance:
type: text
analyzer: autocomplete_english_split
search_analyzer: search_english_split
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
type:
type: text
analyzer: autocomplete_english
Expand All @@ -349,12 +403,20 @@ mappings:
type: text
analyzer: autocomplete_english_split
search_analyzer: search_english_split
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
numberSubmitters:
type: short
reviewStatus:
type: text
analyzer: autocomplete_english_split
search_analyzer: search_english_split
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
chromStart:
type: integer
chromEnd:
Expand Down Expand Up @@ -428,22 +490,50 @@ mappings:
normalizer: lowercase_normalizer
CLNSIG:
type: text
analyzer: autocomplete_english_graph
analyzer: autocomplete_english_split_clinsig
search_analyzer: search_english_split
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
CLNSIGCONF:
type: text
analyzer: autocomplete_english_graph
analyzer: autocomplete_english_split
search_analyzer: search_english_split
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
CLNREVSTAT:
type: text
analyzer: english_graph
analyzer: autocomplete_english_split
search_analyzer: search_english_description_synonyms
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
CLNDN:
type: text
analyzer: autocomplete_english_graph
analyzer: autocomplete_english_split
search_analyzer: search_english_description_synonyms
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
CLNDNINCL:
type: text
analyzer: autocomplete_english_graph
analyzer: search_english_split
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
CLNHGVS:
type: text
analyzer: english_graph
fields:
exact:
type: keyword
normalizer: lowercase_normalizer
DBVARID:
type: keyword
normalizer: lowercase_normalizer
Expand Down
9 changes: 5 additions & 4 deletions config/opensearch.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
connection:
request_timeout: 300
request_timeout: 1200
nodes:
- host: '10.98.135.70'
- host: 'host'
port: '9200'
scheme: 'http'
scheme: 'https'
auth:
dummy: True
username: 'admin'
password: 'admin'
3 changes: 2 additions & 1 deletion go/opensearch/connection/connection.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ type OpensearchConnectionConfig struct {
Nodes []OpensearchNode `yaml:"nodes"`
} `yaml:"connection"`
Auth struct {
Dummy bool `yaml:"dummy"`
Username string `yaml:"username"`
Password string `yaml:"password"`
}
Expand Down Expand Up @@ -108,6 +107,8 @@ func CreateIndex(opensearchConnectionConfigPath string, opensearchIndexConfigPat
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
},
Addresses: createAddresses(osearchConnConfig),
Username: osearchConnConfig.Auth.Username,
Password: osearchConnConfig.Auth.Password,
MaxRetries: 5,
RetryOnStatus: []int{502, 503, 504},
}
Expand Down
2 changes: 1 addition & 1 deletion install/install-go-packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ go mod init bystro

go install github.com/akotlar/bystro-stats@1.0.0;

go install github.com/bystrogenomics/bystro-vcf@2.1.0;
go install github.com/bystrogenomics/bystro-vcf@2.1.1;

go install github.com/akotlar/bystro-snp@1.0.0;

Expand Down
28 changes: 21 additions & 7 deletions python/python/bystro/search/utils/opensearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,30 @@ def gather_opensearch_args(search_conf: dict):
When hosts are prefixed https://, http_auth, client_cert, client_key, ca_certs,
verify_certs, ssl_assert_hostname, and ssl_show_warn will be in effect
"""
http_auth = None
client_cert = None
client_key = None
ca_certs = None

if "auth" in search_conf:
username = search_conf["auth"]["username"]
password = search_conf["auth"]["password"]
http_auth = (username, password)
client_cert = search_conf["auth"].get("client_cert_path")
client_key = search_conf["auth"].get("client_key_path")
ca_certs = search_conf["auth"].get("ca_certs_path")

return dict(
hosts=list(search_conf["connection"]["nodes"]),
http_compress=True,
timeout=search_conf["connection"].get("request_timeout", 600),
http_auth=search_conf["auth"].get("auth"),
client_cert=search_conf["auth"].get("client_cert_path"),
client_key=search_conf["auth"].get("client_key_path"),
ca_certs=search_conf["auth"].get("ca_certs_path"),
verify_certs=True,
timeout=search_conf["connection"].get("request_timeout", 1200),
http_auth=http_auth,
client_cert=client_cert,
client_key=client_key,
ca_certs=ca_certs,
use_ssl=search_conf["connection"].get("use_ssl", False),
verify_certs=search_conf["connection"].get("verify_certs", False),
ssl_assert_hostname=True,
ssl_show_warn=True,
pool_maxsize = 16
pool_maxsize=16,
)

0 comments on commit 159bdcf

Please sign in to comment.