Skip to content

Commit

Permalink
Rename label id field for elastic & add UPDATE_EXISTING_DOCUMENTS to …
Browse files Browse the repository at this point in the history
…API config (#728)

* rename label id field for elastic

* add UPDATE_EXISTING_DOCUMENTS param to API config
  • Loading branch information
tholor committed Jan 12, 2021
1 parent b6e64ca commit e9b5439
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 3 deletions.
4 changes: 4 additions & 0 deletions haystack/document_store/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,10 @@ def write_labels(self, labels: Union[List[Label], List[dict]], index: Optional[s
**label.to_dict()
} # type: Dict[str, Any]

# rename id for elastic
if label.id is not None:
_label["_id"] = str(_label.pop("id"))

labels_to_index.append(_label)
bulk(self.client, labels_to_index, request_timeout=300, refresh=self.refresh_type)

Expand Down
1 change: 1 addition & 0 deletions rest_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
EMBEDDING_DIM = int(os.getenv("EMBEDDING_DIM", 768))
VECTOR_SIMILARITY_METRIC = os.getenv("VECTOR_SIMILARITY_METRIC", "dot_product")
CREATE_INDEX = os.getenv("CREATE_INDEX", "True").lower() == "true"
UPDATE_EXISTING_DOCUMENTS = os.getenv("UPDATE_EXISTING_DOCUMENTS", "False").lower() == "true"

# Reader
READER_MODEL_PATH = os.getenv("READER_MODEL_PATH", "deepset/roberta-base-squad2")
Expand Down
4 changes: 3 additions & 1 deletion rest_api/controller/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
EXCLUDE_META_DATA_FIELDS,
FAQ_QUESTION_FIELD_NAME,
CREATE_INDEX,
VECTOR_SIMILARITY_METRIC
VECTOR_SIMILARITY_METRIC,
UPDATE_EXISTING_DOCUMENTS
)

router = APIRouter()
Expand All @@ -41,6 +42,7 @@
embedding_field=EMBEDDING_FIELD_NAME,
excluded_meta_data=EXCLUDE_META_DATA_FIELDS, # type: ignore
create_index=CREATE_INDEX,
update_existing_documents=UPDATE_EXISTING_DOCUMENTS,
similarity=VECTOR_SIMILARITY_METRIC
)

Expand Down
3 changes: 2 additions & 1 deletion rest_api/controller/file_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from rest_api.config import DB_HOST, DB_PORT, DB_USER, DB_PW, DB_INDEX, DB_INDEX_FEEDBACK, ES_CONN_SCHEME, TEXT_FIELD_NAME, \
SEARCH_FIELD_NAME, FILE_UPLOAD_PATH, EMBEDDING_DIM, EMBEDDING_FIELD_NAME, EXCLUDE_META_DATA_FIELDS, VALID_LANGUAGES, \
FAQ_QUESTION_FIELD_NAME, REMOVE_NUMERIC_TABLES, REMOVE_WHITESPACE, REMOVE_EMPTY_LINES, REMOVE_HEADER_FOOTER, \
CREATE_INDEX, VECTOR_SIMILARITY_METRIC
CREATE_INDEX, UPDATE_EXISTING_DOCUMENTS, VECTOR_SIMILARITY_METRIC
from haystack.document_store.elasticsearch import ElasticsearchDocumentStore
from haystack.file_converter.pdf import PDFToTextConverter
from haystack.file_converter.txt import TextConverter
Expand All @@ -39,6 +39,7 @@
excluded_meta_data=EXCLUDE_META_DATA_FIELDS, # type: ignore
faq_question_field=FAQ_QUESTION_FIELD_NAME,
create_index=CREATE_INDEX,
update_existing_documents=UPDATE_EXISTING_DOCUMENTS,
similarity=VECTOR_SIMILARITY_METRIC
)

Expand Down
3 changes: 2 additions & 1 deletion rest_api/controller/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
RETRIEVER_TYPE, EMBEDDING_MODEL_PATH, USE_GPU, READER_MODEL_PATH, BATCHSIZE, CONTEXT_WINDOW_SIZE, \
TOP_K_PER_CANDIDATE, NO_ANS_BOOST, READER_CAN_HAVE_NO_ANSWER, MAX_PROCESSES, MAX_SEQ_LEN, DOC_STRIDE, \
CONCURRENT_REQUEST_PER_WORKER, FAQ_QUESTION_FIELD_NAME, EMBEDDING_MODEL_FORMAT, READER_TYPE, READER_TOKENIZER, \
GPU_NUMBER, NAME_FIELD_NAME, VECTOR_SIMILARITY_METRIC, CREATE_INDEX, LOG_LEVEL
GPU_NUMBER, NAME_FIELD_NAME, VECTOR_SIMILARITY_METRIC, CREATE_INDEX, LOG_LEVEL, UPDATE_EXISTING_DOCUMENTS

from rest_api.controller.request import Question
from rest_api.controller.response import Answers, AnswersToIndividualQuestion
Expand Down Expand Up @@ -52,6 +52,7 @@
excluded_meta_data=EXCLUDE_META_DATA_FIELDS, # type: ignore
faq_question_field=FAQ_QUESTION_FIELD_NAME,
create_index=CREATE_INDEX,
update_existing_documents=UPDATE_EXISTING_DOCUMENTS,
similarity=VECTOR_SIMILARITY_METRIC
)

Expand Down

0 comments on commit e9b5439

Please sign in to comment.