deepset-ai · brandenchan · Jul 15, 2020 · Jul 15, 2020 · Jul 15, 2020
diff --git a/tutorials/Tutorial5_Evaluation.ipynb b/tutorials/Tutorial5_Evaluation.ipynb
@@ -1639,7 +1639,7 @@
     "\n",
     "# Download evaluation data, which is a subset of Natural Questions development set containing 50 documents\n",
     "doc_dir = \"../data/nq\"\n",
-    "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip\"\n",
+    "s3_url = \"https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip\"\n",
     "fetch_archive_from_http(url=s3_url, output_dir=doc_dir)"
    ],
    "execution_count": 7,
@@ -1701,7 +1701,7 @@
    },
    "source": [
     "# Add evaluation data to Elasticsearch database\n",
-    "document_store.add_eval_data(\"../data/nq/nq_dev_subset.json\")"
+    "document_store.add_eval_data(\"../data/nq/nq_dev_subset_v2.json\")"
    ],
    "execution_count": 9,
    "outputs": [
@@ -2283,7 +2283,7 @@
     "\n",
     "# Evaluation of Reader can also be done directly on a SQuAD-formatted file\n",
     "# without passing the data to Elasticsearch\n",
-    "#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset.json\", device=device)\n",
+    "#reader_eval_results = reader.eval_on_file(\"../data/natural_questions\", \"dev_subset_v2.json\", device=device)\n",
     "\n",
     "## Reader Top-N-Recall is the proportion of predicted answers that overlap with their corresponding correct answer\n",
     "print(\"Reader Top-N-Recall:\", reader_eval_results[\"top_n_recall\"])\n",

diff --git a/tutorials/Tutorial5_Evaluation.py b/tutorials/Tutorial5_Evaluation.py
@@ -39,17 +39,17 @@
 
 # Download evaluation data, which is a subset of Natural Questions development set containing 50 documents
 doc_dir = "../data/nq"
-s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset.json.zip"
+s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/nq_dev_subset_v2.json.zip"
 fetch_archive_from_http(url=s3_url, output_dir=doc_dir)
 
 # Connect to Elasticsearch
 document_store = ElasticsearchDocumentStore(host="localhost", username="", password="", index="document", create_index=False)
 # Add evaluation data to Elasticsearch database
 if LAUNCH_ELASTICSEARCH:
-    document_store.add_eval_data("../data/nq/nq_dev_subset.json")
+    document_store.add_eval_data("../data/nq/nq_dev_subset_v2.json")
 else:
     logger.warning("Since we already have a running ES instance we should not index the same documents again."
-                   "If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset.json')' manually ")
+                   "If you still want to do this call: 'document_store.add_eval_data('../data/nq/nq_dev_subset_v2.json')' manually ")
 
 # Initialize Retriever
 retriever = ElasticsearchRetriever(document_store=document_store)
@@ -74,7 +74,7 @@
 if eval_reader_only:
     reader_eval_results = reader.eval(document_store=document_store, device=device)
     # Evaluation of Reader can also be done directly on a SQuAD-formatted file without passing the data to Elasticsearch
-    #reader_eval_results = reader.eval_on_file("../data/natural_questions", "dev_subset.json", device=device)
+    #reader_eval_results = reader.eval_on_file("../data/nq", "nq_dev_subset_v2.json", device=device)
 
     ## Reader Top-N-Accuracy is the proportion of predicted answers that match with their corresponding correct answer
     print("Reader Top-N-Accuracy:", reader_eval_results["top_n_accuracy"])