Skip to content

Commit

Permalink
Fix datastore create api
Browse files Browse the repository at this point in the history
  • Loading branch information
chiragjn committed Sep 20, 2018
1 parent bc6e3fa commit fd5837c
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 23 deletions.
49 changes: 31 additions & 18 deletions datastore/datastore.py
@@ -1,3 +1,5 @@
import collections

import elastic_search
from chatbot_ner.config import ner_logger, CHATBOT_NER_DATASTORE
from lib.singleton import Singleton
Expand Down Expand Up @@ -100,12 +102,24 @@ def create(self, **kwargs):

if self._engine == ELASTICSEARCH:
self._check_doc_type_for_elasticsearch()
elastic_search.create.create_index(connection=self._client_or_connection,
index_name=self._store_name,
doc_type=self._connection_settings[ELASTICSEARCH_DOC_TYPE],
logger=ner_logger,
ignore=[400, 404],
**kwargs)
elastic_search.create.create_entity_index(connection=self._client_or_connection,
index_name=self._store_name,
doc_type=self._connection_settings[ELASTICSEARCH_DOC_TYPE],
logger=ner_logger,
ignore=[400, 404],
**kwargs)
crf_data_index = self._connection_settings.get(ELASTICSEARCH_CRF_DATA_INDEX_NAME)
if crf_data_index is not None:
self._check_doc_type_for_crf_data_elasticsearch()

elastic_search.create.create_crf_index(
connection=self._client_or_connection,
index_name=crf_data_index,
doc_type=self._connection_settings[ELASTICSEARCH_CRF_DATA_DOC_TYPE],
logger=ner_logger,
ignore=[400, 404],
**kwargs
)

def populate(self, entity_data_directory_path=DEFAULT_ENTITY_DATA_DIRECTORY, csv_file_paths=None, **kwargs):
"""
Expand Down Expand Up @@ -249,6 +263,7 @@ def get_similar_dictionary(self, entity_name, text, fuzziness_threshold="auto:4,
u'mumbai': u'mumbai',
u'pune': u'pune'}
"""
results_dictionary = collections.OrderedDict()
if self._client_or_connection is None:
self._connect()
if self._engine == ELASTICSEARCH:
Expand Down Expand Up @@ -323,6 +338,7 @@ def repopulate(self, entity_data_directory_path=DEFAULT_ENTITY_DATA_DIRECTORY, c
logger=ner_logger,
ignore=[400, 404],
**kwargs)
# TODO: repopulate code for crf index missing

def _check_doc_type_for_elasticsearch(self):
"""
Expand Down Expand Up @@ -381,7 +397,7 @@ def update_entity_data(self, entity_name, entity_data, language_script, **kwargs
elastic_search.populate.entity_data_update(connection=self._client_or_connection,
index_name=update_index,
doc_type=self._connection_settings[
ELASTICSEARCH_DOC_TYPE],
ELASTICSEARCH_DOC_TYPE],
logger=ner_logger,
entity_data=entity_data,
entity_name=entity_name,
Expand Down Expand Up @@ -443,22 +459,19 @@ def get_crf_data_for_entity_name(self, entity_name, **kwargs):
self._connect()
results_dictionary = {}
if self._engine == ELASTICSEARCH:
self._check_doc_type_for_crf_data_elasticsearch()

es_training_index = self._connection_settings.get(ELASTICSEARCH_CRF_DATA_INDEX_NAME)
if es_training_index is None:
raise IndexNotFoundException('Index for ELASTICSEARCH_CRF_DATA_INDEX_NAME not found. '
'Please configure the same')

self._check_doc_type_for_crf_data_elasticsearch()
request_timeout = self._connection_settings.get('request_timeout', 20)
results_dictionary = elastic_search.query.get_crf_data_for_entity_name(connection=self._client_or_connection,
index_name=es_training_index,
doc_type=
self._connection_settings
[ELASTICSEARCH_CRF_DATA_DOC_TYPE],
entity_name=entity_name,
request_timeout=request_timeout,
**kwargs)
results_dictionary = elastic_search.query.get_crf_data_for_entity_name(
connection=self._client_or_connection,
index_name=es_training_index,
doc_type=self._connection_settings[ELASTICSEARCH_CRF_DATA_DOC_TYPE],
entity_name=entity_name,
request_timeout=request_timeout,
**kwargs)
ner_logger.debug('Datastore, get_entity_training_data, results_dictionary %s' % str(entity_name))
return results_dictionary

Expand Down
7 changes: 4 additions & 3 deletions datastore/elastic_search/create.py
Expand Up @@ -27,14 +27,15 @@ def delete_index(connection, index_name, logger, **kwargs):
logger.exception('%s: Exception in deleting index %s ' % (log_prefix, e))


def create_index(connection, index_name, doc_type, logger, mapping_body, **kwargs):
def _create_index(connection, index_name, doc_type, logger, mapping_body, **kwargs):
"""
Creates an Elasticsearch index needed for similarity based searching
Args:
connection: Elasticsearch client object
index_name: The name of the index
doc_type: The type of the documents that will be indexed
logger: logging object to log at debug and exception level
mapping_body: dict, mappings to put on the index
kwargs:
master_timeout: Specify timeout for connection to master
timeout: Explicit operation timeout
Expand Down Expand Up @@ -143,7 +144,7 @@ def create_entity_index(connection, index_name, doc_type, logger, **kwargs):
}
}

create_index(connection, index_name, doc_type, logger, mapping_body, **kwargs)
_create_index(connection, index_name, doc_type, logger, mapping_body, **kwargs)


def create_crf_index(connection, index_name, doc_type, logger, **kwargs):
Expand Down Expand Up @@ -189,7 +190,7 @@ def create_crf_index(connection, index_name, doc_type, logger, **kwargs):
}
}

create_index(connection, index_name, doc_type, logger, mapping_body, **kwargs)
_create_index(connection, index_name, doc_type, logger, mapping_body, **kwargs)


def create_alias(connection, index_list, alias_name, logger, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion datastore/elastic_search/query.py
Expand Up @@ -244,7 +244,7 @@ def _parse_es_search_results(results):
u'_type': u'data_dictionary',
u'highlight': {u'variants': [u'<em>goa</em>']}},
{u'_id': u'AVrW02W99WNuMIY9vmcf',
u'_index': u'gogo_entity_data',
u'_index': u'entity_data',
u'_score': 11.210829,
u'_source': {u'dict_type': u'variants',
u'entity_data': u'city',
Expand Down
4 changes: 3 additions & 1 deletion initial_setup.py
@@ -1,7 +1,8 @@
import os
import nltk
import time

import nltk

BASE_DIR = os.path.dirname(__file__)

print "Downloading nltk corpus: punkt ..."
Expand Down Expand Up @@ -33,6 +34,7 @@
# POPULATING DATASTORE
# Comment out entire section if you want to reuse existing data
from datastore import DataStore

db = DataStore()
print "Setting up DataStore for Chatbot NER"
print "Deleting any stale data ..."
Expand Down

0 comments on commit fd5837c

Please sign in to comment.