Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Evaluation Dataset #233

Merged
merged 2 commits into from
Jul 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 3 additions & 3 deletions tutorials/Tutorial5_Evaluation.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1639,7 +1639,7 @@
"\n",
"# Download evaluation data, which is a subset of Natural Questions development set containing 50 documents\n",
"doc_dir = \"../data/nq\"\n",
"s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip\"\n",
"s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip\"\n",
"fetch_archive_from_http(url=s3_url, output_dir=doc_dir)"
],
"execution_count": 7,
Expand Down Expand Up @@ -1701,7 +1701,7 @@
},
"source": [
"# Add evaluation data to Elasticsearch database\n",
"document_store.add_eval_data(\"../data/nq/nq_dev_subset.json\")"
"document_store.add_eval_data(\"../data/nq/nq_dev_subset_v2.json\")"
],
"execution_count": 9,
"outputs": [
Expand Down Expand Up @@ -2283,7 +2283,7 @@
"\n",
"# Evaluation of Reader can also be done directly on a SQuAD-formatted file\n",
"# without passing the data to Elasticsearch\n",
"#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset.json\", device=device)\n",
"#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset_v2.json\", device=device)\n",
"\n",
"## Reader Top-N-Recall is the proportion of predicted answers that overlap with their corresponding correct answer\n",
"print(\"Reader Top-N-Recall:\", reader_eval_results[\"top_n_recall\"])\n",
Expand Down
8 changes: 4 additions & 4 deletions tutorials/Tutorial5_Evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,17 @@

# Download evaluation data, which is a subset of Natural Questions development set containing 50 documents
doc_dir = "../data/nq"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip"
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)

# Connect to Elasticsearch
document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document", create_index=False)
# Add evaluation data to Elasticsearch database
if LAUNCH_ELASTICSEARCH:
document_store.add_eval_data("../data/nq/nq_dev_subset.json")
document_store.add_eval_data("../data/nq/nq_dev_subset_v2.json")
else:
logger.warning("Since we already have a running ES instance we should not index the same documents again."
"If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset.json')' manually ")
"If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset_v2.json')' manually ")

# Initialize Retriever
retriever = ElasticsearchRetriever(document_store=document_store)
Expand All @@ -74,7 +74,7 @@
if eval_reader_only:
reader_eval_results = reader.eval(document_store=document_store, device=device)
# Evaluation of Reader can also be done directly on a SQuAD-formatted file without passing the data to Elasticsearch
#reader_eval_results = reader.eval_on_file("../data/natural_questions", "dev_subset.json", device=device)
#reader_eval_results = reader.eval_on_file("../data/nq", "nq_dev_subset_v2.json", device=device)

## Reader Top-N-Accuracy is the proportion of predicted answers that match with their corresponding correct answer
print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])
Expand Down