From 2514f0daa3def3b85ad68c486816e899032c59a0 Mon Sep 17 00:00:00 2001
From: Akshita Bhagia <akshita23bhagia@gmail.com>
Date: Mon, 28 Feb 2022 11:16:13 -0800
Subject: [PATCH] Update CI to install allennlp with optional deps for tests
 (#323)

* update ci to install allennlp with optional deps for tests

* fix and black

* move dep install to requirements.txt

* fix conflicting dev requirements

* special case in setup.py
---
 .github/workflows/ci.yml          |  2 +-
 dev-requirements.txt              |  9 +++--
 requirements.txt                  |  2 +-
 setup.py                          |  4 +++
 tests/rc/evaluations/drop_test.py | 22 +++++--------
 tests/rc/evaluations/orb_test.py  | 55 +++++++++++--------------------
 6 files changed, 38 insertions(+), 56 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7bb419d06..6b6cfdbd3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -533,7 +533,7 @@ jobs:
     - name: Patch AllenNLP dependency
       if: github.event_name == 'push' || github.event_name == 'pull_request'
       run: |
-        pip install --upgrade git+https://github.com/allenai/allennlp.git
+        pip install --upgrade "allennlp[all] @ git+https://github.com/allenai/allennlp.git@main"
 
     - name: Debug info
       run: |
diff --git a/dev-requirements.txt b/dev-requirements.txt
index 9066b3011..fb9494f80 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -34,12 +34,11 @@ flaky
 ruamel.yaml
 
 # Generating markdown files from Python modules.
-git+https://github.com/NiklasRosenstein/pydoc-markdown.git@f0bf8af1db4f11581c19d206d4ed1ab34b4854c1
-nr.databind.core<0.0.17
-nr.interface<0.0.6
+pydoc-markdown>=4.0.0,<5.0.0
+databind.core
 
-mkdocs==1.1.2
-mkdocs-material>=5.5.0,<8.2.0
+mkdocs==1.2.3
+mkdocs-material>=5.5.0,<8.3.0
 markdown-include==0.6.0
 
 #### PACKAGE-UPLOAD PACKAGES ####
diff --git a/requirements.txt b/requirements.txt
index 7b711665a..14c4bf1b4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-git+https://github.com/allenai/allennlp.git@main
+allennlp[all] @ git+https://github.com/allenai/allennlp.git@main
 
 torch>=1.7.0,<1.11.0
 
diff --git a/setup.py b/setup.py
index 097aaf96a..e22d56c95 100644
--- a/setup.py
+++ b/setup.py
@@ -32,6 +32,10 @@ def requirement_is_allennlp(req: str) -> bool:
             return True
         if re.match(r"^(git\+)?(https|ssh)://(git@)?github\.com/.*/allennlp\.git", req):
             return True
+        if re.match(
+            r"^allennlp\[.*\] @ (git\+)?(https|ssh)://(git@)?github\.com/.*/allennlp\.git", req
+        ):
+            return True
         return False
 
     def fix_url_dependencies(req: str) -> str:
diff --git a/tests/rc/evaluations/drop_test.py b/tests/rc/evaluations/drop_test.py
index 8fae68348..432004174 100644
--- a/tests/rc/evaluations/drop_test.py
+++ b/tests/rc/evaluations/drop_test.py
@@ -36,13 +36,10 @@ def test_metric_is_length_aware(self):
             predicted=["cat"], gold=["the", "fat", "cat", "the fat", "fat cat", "the fat cat"]
         ) == (0.0, 0.17)
         # F1 score is mean([1.0, 0.5, 0.0, 0.0, 0.0, 0.0])
-        assert (
-            get_metrics(
-                predicted=["the", "fat", "cat", "the fat", "fat cat", "the fat cat"],
-                gold=["cat", "cat dog"],
-            )
-            == (0.0, 0.25)
-        )
+        assert get_metrics(
+            predicted=["the", "fat", "cat", "the fat", "fat cat", "the fat cat"],
+            gold=["cat", "cat dog"],
+        ) == (0.0, 0.25)
 
     def test_articles_are_ignored(self):
         assert get_metrics(["td"], ["the td"]) == (1.0, 1.0)
@@ -86,13 +83,10 @@ def test_multi_span_overlap_in_incorrect_cases(self):
         # only consider bags with matching numbers if they are present
         # F1 scores of:     1.0        2/3   0.0   0.0   0.0   0.0
         # Average them to get F1 of 0.28
-        assert (
-            get_metrics(
-                ["78-yard", "56", "28", "40", "44", "touchdown"],
-                ["78-yard", "56 yard", "1 yard touchdown"],
-            )
-            == (0.0, 0.28)
-        )
+        assert get_metrics(
+            ["78-yard", "56", "28", "40", "44", "touchdown"],
+            ["78-yard", "56 yard", "1 yard touchdown"],
+        ) == (0.0, 0.28)
 
         # two copies of same value will account for only one match (using optimal 1-1 bag alignment)
         assert get_metrics(["23", "23 yard"], ["23-yard", "56 yards"]) == (0.0, 0.5)
diff --git a/tests/rc/evaluations/orb_test.py b/tests/rc/evaluations/orb_test.py
index 370f16754..f67038950 100644
--- a/tests/rc/evaluations/orb_test.py
+++ b/tests/rc/evaluations/orb_test.py
@@ -69,13 +69,10 @@ def test_bag_alignment_is_optimal(self):
         ) == (0.0, 0.4)
 
     def test_multiple_gold_spans(self):
-        assert (
-            get_metric_drop(
-                ["Thomas David Arquette"],
-                [["David Thomas"], ["Thomas Jefferson"], ["David Thomas"], ["Thomas David"]],
-            )
-            == (0.0, 0.8)
-        )
+        assert get_metric_drop(
+            ["Thomas David Arquette"],
+            [["David Thomas"], ["Thomas Jefferson"], ["David Thomas"], ["Thomas David"]],
+        ) == (0.0, 0.8)
 
     def test_long_gold_spans(self):
         assert get_metric_drop(
@@ -85,27 +82,18 @@ def test_long_gold_spans(self):
 
 class TestNarrativeQA:
     def test_ngrams(self):
-        assert (
-            get_metric_narrativeqa(
-                "David Thomas was eating an apple",
-                ["David Thomas was eating an apple and fell to the ground"],
-            )
-            == (0.43, 0.43, 0.57, 0.75, 1.0, 0.6)
-        )
-        assert (
-            get_metric_narrativeqa(
-                "David Thomas was eating an apple and fell to the ground",
-                ["David Thomas was eating an apple", "he fell to the ground"],
-            )
-            == (0.55, 0.38, 0.92, 0.75, 0.6, 1.0)
-        )
-        assert (
-            get_metric_narrativeqa(
-                "David Thomas was eating an apple and fell to the ground",
-                ["David Thomas was eating an apple and fell to the ground"],
-            )
-            == (1.0, 1.0, 1.0, 1.0, 1.0, 1.0)
-        )
+        assert get_metric_narrativeqa(
+            "David Thomas was eating an apple",
+            ["David Thomas was eating an apple and fell to the ground"],
+        ) == (0.43, 0.43, 0.57, 0.75, 1.0, 0.6)
+        assert get_metric_narrativeqa(
+            "David Thomas was eating an apple and fell to the ground",
+            ["David Thomas was eating an apple", "he fell to the ground"],
+        ) == (0.55, 0.38, 0.92, 0.75, 0.6, 1.0)
+        assert get_metric_narrativeqa(
+            "David Thomas was eating an apple and fell to the ground",
+            ["David Thomas was eating an apple and fell to the ground"],
+        ) == (1.0, 1.0, 1.0, 1.0, 1.0, 1.0)
 
 
 class TestQuoREF:
@@ -152,13 +140,10 @@ def test_bag_alignment_is_optimal(self):
         ) == (0.0, 0.4)
 
     def test_multiple_gold_spans(self):
-        assert (
-            get_metric_drop(
-                ["Thomas David Arquette"],
-                [["David Thomas"], ["Thomas Jefferson"], ["David Thomas"], ["Thomas David"]],
-            )
-            == (0.0, 0.8)
-        )
+        assert get_metric_drop(
+            ["Thomas David Arquette"],
+            [["David Thomas"], ["Thomas Jefferson"], ["David Thomas"], ["Thomas David"]],
+        ) == (0.0, 0.8)
 
     def test_long_gold_spans(self):
         assert get_metric_drop(