Skip to content

Commit

Permalink
fix: str issues in squad_to_dpr (#4826)
Browse files Browse the repository at this point in the history
* fix #4754

* fix #4753

* run black formatting

---------

Co-authored-by: Julian Risch <julian.risch@deepset.ai>
  • Loading branch information
PhilipMay and julian-risch committed May 7, 2023
1 parent 705a2c0 commit 2ff8b0d
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
4 changes: 3 additions & 1 deletion haystack/document_stores/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ def _extract_docs_and_labels_from_dict(
)
labels.append(label)
else:
ans_position = cur_full_doc.content[answer["answer_start"] : answer["answer_start"] + len(ans)]
ans_position = cur_full_doc.content[
answer["answer_start"] : answer["answer_start"] + len(str(ans))
]
if ans != ans_position:
# do not use answer
problematic_ids.append(qa.get("id", "missing"))
Expand Down
2 changes: 1 addition & 1 deletion haystack/utils/squad_to_dpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def get_hard_negative_contexts(retriever: BaseRetriever, question: str, answers:
for retrieved_doc in retrieved_docs:
retrieved_doc_id = retrieved_doc.meta.get("name", "")
retrieved_doc_text = retrieved_doc.content
if any(answer.lower() in retrieved_doc_text.lower() for answer in answers):
if any(str(answer).lower() in retrieved_doc_text.lower() for answer in answers):
continue
list_hard_neg_ctxs.append({"title": retrieved_doc_id, "text": retrieved_doc_text, "passage_id": ""})

Expand Down

0 comments on commit 2ff8b0d

Please sign in to comment.