From 2514f0daa3def3b85ad68c486816e899032c59a0 Mon Sep 17 00:00:00 2001 From: Akshita Bhagia Date: Mon, 28 Feb 2022 11:16:13 -0800 Subject: [PATCH] Update CI to install allennlp with optional deps for tests (#323) * update ci to install allennlp with optional deps for tests * fix and black * move dep install to requirements.txt * fix conflicting dev requirements * special case in setup.py --- .github/workflows/ci.yml | 2 +- dev-requirements.txt | 9 +++-- requirements.txt | 2 +- setup.py | 4 +++ tests/rc/evaluations/drop_test.py | 22 +++++-------- tests/rc/evaluations/orb_test.py | 55 +++++++++++-------------------- 6 files changed, 38 insertions(+), 56 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7bb419d06..6b6cfdbd3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -533,7 +533,7 @@ jobs: - name: Patch AllenNLP dependency if: github.event_name == 'push' || github.event_name == 'pull_request' run: | - pip install --upgrade git+https://github.com/allenai/allennlp.git + pip install --upgrade "allennlp[all] @ git+https://github.com/allenai/allennlp.git@main" - name: Debug info run: | diff --git a/dev-requirements.txt b/dev-requirements.txt index 9066b3011..fb9494f80 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -34,12 +34,11 @@ flaky ruamel.yaml # Generating markdown files from Python modules. -git+https://github.com/NiklasRosenstein/pydoc-markdown.git@f0bf8af1db4f11581c19d206d4ed1ab34b4854c1 -nr.databind.core<0.0.17 -nr.interface<0.0.6 +pydoc-markdown>=4.0.0,<5.0.0 +databind.core -mkdocs==1.1.2 -mkdocs-material>=5.5.0,<8.2.0 +mkdocs==1.2.3 +mkdocs-material>=5.5.0,<8.3.0 markdown-include==0.6.0 #### PACKAGE-UPLOAD PACKAGES #### diff --git a/requirements.txt b/requirements.txt index 7b711665a..14c4bf1b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -git+https://github.com/allenai/allennlp.git@main +allennlp[all] @ git+https://github.com/allenai/allennlp.git@main torch>=1.7.0,<1.11.0 diff --git a/setup.py b/setup.py index 097aaf96a..e22d56c95 100644 --- a/setup.py +++ b/setup.py @@ -32,6 +32,10 @@ def requirement_is_allennlp(req: str) -> bool: return True if re.match(r"^(git\+)?(https|ssh)://(git@)?github\.com/.*/allennlp\.git", req): return True + if re.match( + r"^allennlp\[.*\] @ (git\+)?(https|ssh)://(git@)?github\.com/.*/allennlp\.git", req + ): + return True return False def fix_url_dependencies(req: str) -> str: diff --git a/tests/rc/evaluations/drop_test.py b/tests/rc/evaluations/drop_test.py index 8fae68348..432004174 100644 --- a/tests/rc/evaluations/drop_test.py +++ b/tests/rc/evaluations/drop_test.py @@ -36,13 +36,10 @@ def test_metric_is_length_aware(self): predicted=["cat"], gold=["the", "fat", "cat", "the fat", "fat cat", "the fat cat"] ) == (0.0, 0.17) # F1 score is mean([1.0, 0.5, 0.0, 0.0, 0.0, 0.0]) - assert ( - get_metrics( - predicted=["the", "fat", "cat", "the fat", "fat cat", "the fat cat"], - gold=["cat", "cat dog"], - ) - == (0.0, 0.25) - ) + assert get_metrics( + predicted=["the", "fat", "cat", "the fat", "fat cat", "the fat cat"], + gold=["cat", "cat dog"], + ) == (0.0, 0.25) def test_articles_are_ignored(self): assert get_metrics(["td"], ["the td"]) == (1.0, 1.0) @@ -86,13 +83,10 @@ def test_multi_span_overlap_in_incorrect_cases(self): # only consider bags with matching numbers if they are present # F1 scores of: 1.0 2/3 0.0 0.0 0.0 0.0 # Average them to get F1 of 0.28 - assert ( - get_metrics( - ["78-yard", "56", "28", "40", "44", "touchdown"], - ["78-yard", "56 yard", "1 yard touchdown"], - ) - == (0.0, 0.28) - ) + assert get_metrics( + ["78-yard", "56", "28", "40", "44", "touchdown"], + ["78-yard", "56 yard", "1 yard touchdown"], + ) == (0.0, 0.28) # two copies of same value will account for only one match (using optimal 1-1 bag alignment) assert get_metrics(["23", "23 yard"], ["23-yard", "56 yards"]) == (0.0, 0.5) diff --git a/tests/rc/evaluations/orb_test.py b/tests/rc/evaluations/orb_test.py index 370f16754..f67038950 100644 --- a/tests/rc/evaluations/orb_test.py +++ b/tests/rc/evaluations/orb_test.py @@ -69,13 +69,10 @@ def test_bag_alignment_is_optimal(self): ) == (0.0, 0.4) def test_multiple_gold_spans(self): - assert ( - get_metric_drop( - ["Thomas David Arquette"], - [["David Thomas"], ["Thomas Jefferson"], ["David Thomas"], ["Thomas David"]], - ) - == (0.0, 0.8) - ) + assert get_metric_drop( + ["Thomas David Arquette"], + [["David Thomas"], ["Thomas Jefferson"], ["David Thomas"], ["Thomas David"]], + ) == (0.0, 0.8) def test_long_gold_spans(self): assert get_metric_drop( @@ -85,27 +82,18 @@ def test_long_gold_spans(self): class TestNarrativeQA: def test_ngrams(self): - assert ( - get_metric_narrativeqa( - "David Thomas was eating an apple", - ["David Thomas was eating an apple and fell to the ground"], - ) - == (0.43, 0.43, 0.57, 0.75, 1.0, 0.6) - ) - assert ( - get_metric_narrativeqa( - "David Thomas was eating an apple and fell to the ground", - ["David Thomas was eating an apple", "he fell to the ground"], - ) - == (0.55, 0.38, 0.92, 0.75, 0.6, 1.0) - ) - assert ( - get_metric_narrativeqa( - "David Thomas was eating an apple and fell to the ground", - ["David Thomas was eating an apple and fell to the ground"], - ) - == (1.0, 1.0, 1.0, 1.0, 1.0, 1.0) - ) + assert get_metric_narrativeqa( + "David Thomas was eating an apple", + ["David Thomas was eating an apple and fell to the ground"], + ) == (0.43, 0.43, 0.57, 0.75, 1.0, 0.6) + assert get_metric_narrativeqa( + "David Thomas was eating an apple and fell to the ground", + ["David Thomas was eating an apple", "he fell to the ground"], + ) == (0.55, 0.38, 0.92, 0.75, 0.6, 1.0) + assert get_metric_narrativeqa( + "David Thomas was eating an apple and fell to the ground", + ["David Thomas was eating an apple and fell to the ground"], + ) == (1.0, 1.0, 1.0, 1.0, 1.0, 1.0) class TestQuoREF: @@ -152,13 +140,10 @@ def test_bag_alignment_is_optimal(self): ) == (0.0, 0.4) def test_multiple_gold_spans(self): - assert ( - get_metric_drop( - ["Thomas David Arquette"], - [["David Thomas"], ["Thomas Jefferson"], ["David Thomas"], ["Thomas David"]], - ) - == (0.0, 0.8) - ) + assert get_metric_drop( + ["Thomas David Arquette"], + [["David Thomas"], ["Thomas Jefferson"], ["David Thomas"], ["Thomas David"]], + ) == (0.0, 0.8) def test_long_gold_spans(self): assert get_metric_drop(