Skip to content

Commit

Permalink
Updated ES term index creation
Browse files Browse the repository at this point in the history
  • Loading branch information
wshayes committed Sep 6, 2018
1 parent 32c8749 commit 0a8024a
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 39 deletions.
18 changes: 13 additions & 5 deletions bel/db/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
log = logging.getLogger(__name__)

cur_dir_name = os.path.dirname(os.path.realpath(__file__))
mapping_terms_fn = f'{cur_dir_name}/es_mapping_terms.yml'

mappings_terms_fn = f'{cur_dir_name}/es_mappings_terms.yml'
settings_terms_fn = f'{cur_dir_name}/es_settings_terms.yml'
terms_alias = 'terms'


Expand Down Expand Up @@ -51,11 +51,19 @@ def delete_index(es, index_name: str):
def create_terms_index(es, index_name: str):
"""Create terms index"""

with open(mapping_terms_fn, 'r') as f:
mapping_terms = yaml.load(f)
with open(mappings_terms_fn, 'r') as f:
mappings_terms = yaml.load(f)

with open(settings_terms_fn, 'r') as f:
settings_terms = yaml.load(f)

try:
es.indices.create(index=index_name, body=mapping_terms)
es.indices.create(index=index_name, body=mappings_terms)

# Update settings - have to close before and then open after
es.indices.close(index=index_name)
es.indices.put_settings(index=index_name, body=settings_terms)
es.indices.open(index=index_name)

except Exception as e:
log.error(f'Could not create elasticsearch terms index: {e}')
Expand Down
34 changes: 0 additions & 34 deletions bel/db/es_mapping_terms.yml → bel/db/es_mappings_terms.yml
Original file line number Diff line number Diff line change
@@ -1,37 +1,3 @@
settings:
analysis:
filter:
autocomplete_filter:
type: edge_ngram
min_gram: 1
max_gram: 20
analyzer:
autocomplete:
type: custom
tokenizer: autocomplete_tokenizer
filter:
- lowercase
- autocomplete_filter
autocomplete_search:
type: custom
tokenizer: autocomplete_tokenizer
filter:
- lowercase

# Added simple pattern split to allow SP:H4_HUMAN in alt_ids to be matched
# This will also allow IL-6 to be completed against. The standard tokenizer/analyzer
# broke it into IL and 6
tokenizer:
autocomplete_tokenizer:
type: simple_pattern_split
pattern: " |:"
normalizer:
lowercase:
type: custom
char_filter: []
filter:
- lowercase

mappings:
term:
properties:
Expand Down
33 changes: 33 additions & 0 deletions bel/db/es_settings_terms.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
settings:
analysis:
filter:
autocomplete_filter:
type: edge_ngram
min_gram: 1
max_gram: 20
analyzer:
autocomplete:
type: custom
tokenizer: autocomplete_tokenizer
filter:
- lowercase
- autocomplete_filter
autocomplete_search:
type: custom
tokenizer: autocomplete_tokenizer
filter:
- lowercase

# Added simple pattern split to allow SP:H4_HUMAN in alt_ids to be matched
# This will also allow IL-6 to be completed against. The standard tokenizer/analyzer
# broke it into IL and 6
tokenizer:
autocomplete_tokenizer:
type: simple_pattern_split
pattern: " |:"
normalizer:
lowercase:
type: custom
char_filter: []
filter:
- lowercase

0 comments on commit 0a8024a

Please sign in to comment.