cylynx · swanhl · Dec 3, 2021 · Nov 15, 2021 · Nov 15, 2021 · Nov 15, 2021
diff --git a/.github/workflows/run-pytest-FEAT.yml b/.github/workflows/run-pytest-FEAT.yml
@@ -0,0 +1,24 @@
+name: Run pytest for FEAT tests on each push
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [3.8, 3.9]
+    steps:
+      - uses: actions/checkout@v2
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v2
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install package locally
+        run: |
+          python -m pip install --upgrade pip
+          pip install .
+      - name: Test with pytest
+        run: |
+          pip install pytest
+          pytest verifyml/model_tests/FEAT/tests/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
     - repo: https://github.com/psf/black
-      rev: stable
+      rev: 21.11b1
       hooks:
         - id: black
           language_version: python3
diff --git a/examples/model_card_output/model_cards/loan_approval_example2.html b/examples/model_card_output/model_cards/loan_approval_example2.html
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,4 +3,9 @@ requires = [
     "setuptools>=42",
     "wheel"
 ]
-build-backend = "setuptools.build_meta"
+build-backend = "setuptools.build_meta"
+
+[tool.pytest.ini_options]
+filterwarnings = [
+    'ignore:IPython could not be loaded!'
+]
diff --git a/verifyml/model_tests/FEAT/DataShift.py b/verifyml/model_tests/FEAT/DataShift.py
@@ -200,9 +200,11 @@ def plot(self, alpha: float = 0.05, save_plots: bool = True):
                     lambda x: z_value * (x * (1 - x) / self.df_size[1]) ** 0.5
                 )
             )
-
-            df_plot.plot.bar(yerr=[train_ci, eval_ci], rot=0, ax=axs[num], title=pa)
-            num += 1
+            if len(self.protected_attr) > 1:
+                df_plot.plot.bar(yerr=[train_ci, eval_ci], rot=0, ax=axs[num], title=pa)
+                num += 1
+            else:
+                df_plot.plot.bar(yerr=[train_ci, eval_ci], rot=0, ax=axs, title=pa)
 
         title = "Probability Distribution of protected attributes"
         fig.suptitle(title)

diff --git a/verifyml/model_tests/FEAT/MinMaxMetricThreshold.py b/verifyml/model_tests/FEAT/MinMaxMetricThreshold.py
@@ -57,14 +57,13 @@ class MinMaxMetricThreshold(ModelTest):
     test_desc: str = None
 
     def __post_init__(self):
+        lower_req = {"fpr", "fnr", "mse", "mae"}
+        higher_req = {"tpr", "tnr"}
 
-        lower_req_metrics = {"fpr", "fnr", "mse", "mae"}
-        higher_req_metrics = {"tpr", "tnr"}
+        if self.metric not in lower_req | higher_req:
+            raise ValueError(f"metric should be one of {lower_req | higher_req}.")
 
-        if self.metric not in lower_req_metrics | higher_req_metrics:
-            raise ValueError(f"metric should be one of {metrics}.")
-
-        req = "lower" if self.metric in lower_req_metrics else "higher"
+        req = "lower" if self.metric in lower_req else "higher"
 
         default_test_desc = inspect.cleandoc(
             f"""
@@ -108,7 +107,7 @@ def get_result_regression(self, df_test_with_output: pd.DataFrame) -> pd.DataFra
                 )
             self.dof_list.append(len(output_sub) - 1)
 
-        result = pd.DataFrame.from_dict(result, orient="index", columns=[self.metric],)
+        result = pd.DataFrame.from_dict(result, orient="index", columns=[self.metric])
 
         result["passed"] = result.iloc[:, 0].apply(lambda x: x < self.threshold)
         result = result.round(3)

diff --git a/verifyml/model_tests/FEAT/Perturbation.py b/verifyml/model_tests/FEAT/Perturbation.py
@@ -19,7 +19,6 @@
 import pandas as pd
 import numpy as np
 import matplotlib.pyplot as plt
-import numpy as np
 from sklearn.metrics import confusion_matrix, mean_squared_error, mean_absolute_error
 from sklearn.base import is_classifier
 from scipy.stats import norm, chi2

diff --git a/verifyml/model_tests/FEAT/SubgroupDisparity.py b/verifyml/model_tests/FEAT/SubgroupDisparity.py
@@ -233,11 +233,10 @@ def get_result(self, df_test_with_output: pd.DataFrame) -> Dict[str, float]:
                 f"Classification metrics is not applicable with regression problem. Try metric = 'mse' "
             )
 
+        self.metric_dict, self.size_list = self.get_metric_dict(df_test_with_output)
         if self.method == "ratio":
-            self.metric_dict, self.size_list = self.get_metric_dict(df_test_with_output)
             result = max(self.metric_dict.values()) / min(self.metric_dict.values())
         elif self.method == "diff":
-            self.metric_dict, self.size_list = self.get_metric_dict(df_test_with_output)
             result = max(self.metric_dict.values()) - min(self.metric_dict.values())
         elif self.method == "chi2":
             if self.metric in ["mse", "mae"]:

diff --git a/verifyml/model_tests/FEAT/tests/README.md b/verifyml/model_tests/FEAT/tests/README.md
@@ -0,0 +1,28 @@
+# Tests
+
+Files to test the functionality of the FEAT test classes using `pytest`.
+
+## Installation
+
+```bash
+pip install -U pytest
+```
+
+## Running
+
+Run this command while in this directory:
+
+```bash
+# -v: verbose
+pytest -v
+```
+
+Otherwise, specify it like this:
+
+```bash
+pytest <PATH TO THIS TEST DIR>/ -v
+```
+
+## Configuration
+
+Use the `[tool.pytest.ini_options]` section of VerifyML's [`pyproject.toml` file](https://github.com/cylynx/verifyml/blob/main/pyproject.toml) to configure `pytest`.
diff --git a/verifyml/model_tests/FEAT/tests/__init__.py b/verifyml/model_tests/FEAT/tests/__init__.py
diff --git a/verifyml/model_tests/FEAT/tests/test_DataShift.py b/verifyml/model_tests/FEAT/tests/test_DataShift.py
@@ -0,0 +1,121 @@
+# Test cases for the DataShift FEAT test
+
+from ..DataShift import DataShift
+
+import inspect
+import pandas as pd
+
+# Sample test case datas
+x_train_data = pd.DataFrame(
+    {"gender": ["M", "M", "M", "M", "M", "M", "F", "F", "F", "F"]}
+)
+x_test_data = pd.DataFrame(
+    {"gender": ["M", "M", "M", "M", "M", "F", "F", "F", "F", "F"]}
+)
+
+
+def test_plot_defaults():
+    """Test that the default arguments of the plot() method are as expected."""
+
+    sig = inspect.signature(DataShift.plot)
+
+    assert sig.parameters["alpha"].default == 0.05
+    assert sig.parameters["save_plots"].default == True
+
+
+def test_save_plots_true():
+    """Test that the plot is saved to the test object when .plot(save_plots=True)."""
+    # init test object
+    data_test = DataShift(protected_attr=["gender"], method="ratio", threshold=1.5)
+
+    # run test
+    data_test.run(x_train=x_train_data, x_test=x_test_data)
+
+    # plot it
+    data_test.plot(save_plots=True)
+
+    # test object should be a dict of length 1
+    assert len(data_test.plots) == 1
+
+    # test object should have the specified key, and the value should be a string
+    assert isinstance(
+        data_test.plots["Probability Distribution of protected attributes"], str
+    )
+
+
+def test_save_plots_false():
+    """Test that the plot is not saved to the test object when .plot(save_plots=False)."""
+    # init test object
+    data_test = DataShift(protected_attr=["gender"], method="ratio", threshold=1.5)
+
+    # run test
+    data_test.run(x_train=x_train_data, x_test=x_test_data)
+
+    # plot it
+    data_test.plot(save_plots=False)
+
+    # nothing should be saved
+    assert len(data_test.plots) == 0
+
+
+def test_run_ratio():
+    """Test that calling .run() updates the test object's .result and .passed attributes."""
+    # init test object
+    data_test = DataShift(protected_attr=["gender"], method="ratio", threshold=1.23)
+
+    # run test
+    data_test.run(x_train=x_train_data, x_test=x_test_data)
+
+    assert data_test.result.loc["gender_F"]["training_distribution"] == 0.4
+    assert data_test.result.loc["gender_F"]["eval_distribution"] == 0.5
+    assert data_test.result.loc["gender_F"]["ratio"] == 1.25
+    assert data_test.result.loc["gender_F"]["passed"] == False
+
+    assert data_test.result.loc["gender_M"]["training_distribution"] == 0.6
+    assert data_test.result.loc["gender_M"]["eval_distribution"] == 0.5
+    assert data_test.result.loc["gender_M"]["ratio"] == 1.2
+    assert data_test.result.loc["gender_M"]["passed"] == True
+
+    assert data_test.passed == False
+
+
+def test_run_difference():
+    """Test that calling .run() updates the test object's .result and .passed attributes."""
+    # init test object
+    data_test = DataShift(protected_attr=["gender"], method="diff", threshold=0.1)
+
+    # run test
+    data_test.run(x_train=x_train_data, x_test=x_test_data)
+
+    assert data_test.result.loc["gender_F"]["training_distribution"] == 0.4
+    assert data_test.result.loc["gender_F"]["eval_distribution"] == 0.5
+    assert data_test.result.loc["gender_F"]["difference"] == 0.1
+    assert data_test.result.loc["gender_F"]["passed"] == True
+
+    assert data_test.result.loc["gender_M"]["training_distribution"] == 0.6
+    assert data_test.result.loc["gender_M"]["eval_distribution"] == 0.5
+    assert data_test.result.loc["gender_M"]["difference"] == 0.1
+    assert data_test.result.loc["gender_M"]["passed"] == True
+
+    assert data_test.passed == True
+
+
+def test_run_chi2():
+    """Test that calling .run() updates the test object's .result and .passed attributes."""
+    # init test object
+    data_test = DataShift(protected_attr=["gender"], method="chi2", threshold=1.1)
+
+    # run test
+    data_test.run(x_train=x_train_data, x_test=x_test_data)
+
+    assert data_test.result.loc["gender_F"]["training_distribution"] == 0.4
+    assert data_test.result.loc["gender_F"]["eval_distribution"] == 0.5
+    assert data_test.result.loc["gender_F"]["p-value"] == 1
+    assert data_test.result.loc["gender_F"]["passed"] == False
+
+    assert data_test.result.loc["gender_M"]["training_distribution"] == 0.6
+    assert data_test.result.loc["gender_M"]["eval_distribution"] == 0.5
+    assert data_test.result.loc["gender_M"]["p-value"] == 1
+    assert data_test.result.loc["gender_M"]["passed"] == False
+
+    assert data_test.passed == False
diff --git a/verifyml/model_tests/FEAT/tests/test_FeatureImportance.py b/verifyml/model_tests/FEAT/tests/test_FeatureImportance.py
@@ -0,0 +1,79 @@
+# Test cases for the FeatureImportance FEAT test
+
+from ..FeatureImportance import FeatureImportance
+
+import inspect
+import pandas as pd
+
+# Sample test case data
+test_data = pd.DataFrame(
+    {
+        "features": [
+            "income",
+            "gender_M",
+            "gender_F",
+            "married_No",
+            "amt",
+            "age",
+            "married_Yes",
+        ],
+        "value": [0.7, 0.4, 0.2, 0.1, 0.6, 0.5, 0.3],
+    }
+)
+
+
+def test_plot_defaults():
+    """Test that the default arguments of the plot() method are as expected."""
+
+    sig = inspect.signature(FeatureImportance.plot)
+
+    assert sig.parameters["show_n"].default == 10
+    assert sig.parameters["save_plots"].default == True
+
+
+def test_save_plots_true():
+    """Test that the plot is saved to the test object when .plot(save_plots=True)."""
+    # init test object
+    imp_test = FeatureImportance(attrs=["gender", "married"], threshold=4)
+
+    # plot it
+    imp_test.plot(test_data, save_plots=True)
+
+    # test object should be a dict of length 1
+    assert len(imp_test.plots) == 1
+
+    # test object should have the specified key, and the value should be a string
+    assert isinstance(imp_test.plots["Feature Importance Plot"], str)
+
+
+def test_save_plots_false():
+    """Test that the plot is not saved to the test object when .plot(save_plots=False)."""
+    # init test object
+    imp_test = FeatureImportance(attrs=["gender", "married"], threshold=4)
+
+    # plot it
+    imp_test.plot(test_data, save_plots=False)
+
+    # nothing should be saved
+    assert len(imp_test.plots) == 0
+
+
+def test_run():
+    """Test that calling .run() updates the test object's .result and .passed attributes."""
+    # init test object
+    imp_test = FeatureImportance(attrs=["gender", "married"], threshold=4)
+
+    # run test
+    imp_test.run(test_data)
+
+    imp_test.result.loc["gender_M"].feature_rank == 4
+    imp_test.result.loc["gender_F"].feature_rank == 6
+    imp_test.result.loc["married_Yes"].feature_rank == 5
+    imp_test.result.loc["married_No"].feature_rank == 7
+
+    imp_test.result.loc["gender_M"].passed == False
+    imp_test.result.loc["gender_F"].passed == True
+    imp_test.result.loc["married_Yes"].passed == True
+    imp_test.result.loc["married_No"].passed == True
+
+    assert imp_test.passed == False