Checklist tests (#255)

* checklist tests * Update CHANGELOG.md
allenai · Apr 25, 2021 · ab1e86a · ab1e86a
1 parent 659c71f
commit ab1e86a
Show file tree

Hide file tree

Showing 5 changed files with 65 additions and 0 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## Unreleased
+
+### Added
+
+- Added tests for checklist suites for SQuAD-style reading comprehension models (`bidaf`), and textual entailment models (`decomposable_attention` and `esim`).
+
 
 ## [v2.4.0](https://github.com/allenai/allennlp-models/releases/tag/v2.4.0) - 2021-04-22
 

diff --git a/tests/pair_classification/task_checklists/__init__.py b/tests/pair_classification/task_checklists/__init__.py
diff --git a/tests/pair_classification/task_checklists/textual_entailment_suite_test.py b/tests/pair_classification/task_checklists/textual_entailment_suite_test.py
@@ -0,0 +1,32 @@
+import pytest
+from allennlp.sanity_checks.task_checklists.textual_entailment_suite import TextualEntailmentSuite
+from allennlp.common.testing import AllenNlpTestCase
+from allennlp.models.archival import load_archive
+from allennlp.predictors import Predictor
+
+from allennlp_models.pair_classification.predictors import *  # noqa: F403
+from tests import FIXTURES_ROOT
+
+
+class TestTextualEntailmentSuite(AllenNlpTestCase):
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "decomposable_attention",
+            "esim",
+        ],
+    )
+    def test_run(self, model: str):
+
+        archive = load_archive(
+            FIXTURES_ROOT / "pair_classification" / model / "serialization" / "model.tar.gz"
+        )
+        predictor = Predictor.from_archive(archive)
+
+        data = [
+            ("Alice and Bob are friends.", "Alice is Bob's friend."),
+            ("The park had children playing", "The park was empty."),
+        ]
+
+        suite = TextualEntailmentSuite(probs_key="label_probs", add_default_tests=True, data=data)
+        suite.run(predictor, max_examples=10)
diff --git a/tests/rc/task_checklists/__init__.py b/tests/rc/task_checklists/__init__.py
diff --git a/tests/rc/task_checklists/question_answering_suite_test.py b/tests/rc/task_checklists/question_answering_suite_test.py
@@ -0,0 +1,27 @@
+import pytest
+from allennlp.sanity_checks.task_checklists.question_answering_suite import QuestionAnsweringSuite
+from allennlp.common.testing import AllenNlpTestCase
+from allennlp.models.archival import load_archive
+from allennlp.predictors import Predictor
+
+from allennlp_models.rc.predictors import *  # noqa: F403
+from tests import FIXTURES_ROOT
+
+
+class TestQuestionAnsweringSuite(AllenNlpTestCase):
+    @pytest.mark.parametrize(
+        "model",
+        [
+            "bidaf",
+        ],
+    )
+    def test_run(self, model: str):
+        archive = load_archive(FIXTURES_ROOT / "rc" / model / "serialization" / "model.tar.gz")
+        predictor = Predictor.from_archive(archive)
+
+        data = [
+            ("Alice is taller than Bob.", "Who is taller?"),
+            ("Children were playing in the park.", "Was the park empty?"),
+        ]
+        suite = QuestionAnsweringSuite(context_key="passage", add_default_tests=True, data=data)
+        suite.run(predictor, max_examples=10)