Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add testing for feat functions #44

Merged
merged 15 commits into from
Dec 3, 2021
24 changes: 24 additions & 0 deletions .github/workflows/run-pytest-FEAT.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Run pytest for FEAT tests on each push

on: [push]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [3.8, 3.9]
steps:
- uses: actions/checkout@v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install package locally
run: |
python -m pip install --upgrade pip
pip install .
- name: Test with pytest
run: |
pip install pytest
pytest verifyml/model_tests/FEAT/tests/
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/psf/black
rev: stable
rev: 21.11b1
hooks:
- id: black
language_version: python3
3,071 changes: 3,071 additions & 0 deletions examples/model_card_output/model_cards/loan_approval_example2.html

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,9 @@ requires = [
"setuptools>=42",
"wheel"
]
build-backend = "setuptools.build_meta"
build-backend = "setuptools.build_meta"

[tool.pytest.ini_options]
filterwarnings = [
'ignore:IPython could not be loaded!'
]
8 changes: 5 additions & 3 deletions verifyml/model_tests/FEAT/DataShift.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,11 @@ def plot(self, alpha: float = 0.05, save_plots: bool = True):
lambda x: z_value * (x * (1 - x) / self.df_size[1]) ** 0.5
)
)

df_plot.plot.bar(yerr=[train_ci, eval_ci], rot=0, ax=axs[num], title=pa)
num += 1
if len(self.protected_attr) > 1:
df_plot.plot.bar(yerr=[train_ci, eval_ci], rot=0, ax=axs[num], title=pa)
num += 1
else:
df_plot.plot.bar(yerr=[train_ci, eval_ci], rot=0, ax=axs, title=pa)

title = "Probability Distribution of protected attributes"
fig.suptitle(title)
Expand Down
13 changes: 6 additions & 7 deletions verifyml/model_tests/FEAT/MinMaxMetricThreshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,14 +57,13 @@ class MinMaxMetricThreshold(ModelTest):
test_desc: str = None

def __post_init__(self):
lower_req = {"fpr", "fnr", "mse", "mae"}
higher_req = {"tpr", "tnr"}

lower_req_metrics = {"fpr", "fnr", "mse", "mae"}
higher_req_metrics = {"tpr", "tnr"}
if self.metric not in lower_req | higher_req:
raise ValueError(f"metric should be one of {lower_req | higher_req}.")

if self.metric not in lower_req_metrics | higher_req_metrics:
raise ValueError(f"metric should be one of {metrics}.")

req = "lower" if self.metric in lower_req_metrics else "higher"
req = "lower" if self.metric in lower_req else "higher"

default_test_desc = inspect.cleandoc(
f"""
Expand Down Expand Up @@ -108,7 +107,7 @@ def get_result_regression(self, df_test_with_output: pd.DataFrame) -> pd.DataFra
)
self.dof_list.append(len(output_sub) - 1)

result = pd.DataFrame.from_dict(result, orient="index", columns=[self.metric],)
result = pd.DataFrame.from_dict(result, orient="index", columns=[self.metric])

result["passed"] = result.iloc[:, 0].apply(lambda x: x < self.threshold)
result = result.round(3)
Expand Down
1 change: 0 additions & 1 deletion verifyml/model_tests/FEAT/Perturbation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
from sklearn.metrics import confusion_matrix, mean_squared_error, mean_absolute_error
from sklearn.base import is_classifier
from scipy.stats import norm, chi2
Expand Down
3 changes: 1 addition & 2 deletions verifyml/model_tests/FEAT/SubgroupDisparity.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,11 +233,10 @@ def get_result(self, df_test_with_output: pd.DataFrame) -> Dict[str, float]:
f"Classification metrics is not applicable with regression problem. Try metric = 'mse' "
)

self.metric_dict, self.size_list = self.get_metric_dict(df_test_with_output)
if self.method == "ratio":
self.metric_dict, self.size_list = self.get_metric_dict(df_test_with_output)
result = max(self.metric_dict.values()) / min(self.metric_dict.values())
elif self.method == "diff":
self.metric_dict, self.size_list = self.get_metric_dict(df_test_with_output)
result = max(self.metric_dict.values()) - min(self.metric_dict.values())
elif self.method == "chi2":
if self.metric in ["mse", "mae"]:
Expand Down
28 changes: 28 additions & 0 deletions verifyml/model_tests/FEAT/tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Tests

Files to test the functionality of the FEAT test classes using `pytest`.

## Installation

```bash
pip install -U pytest
```

## Running

Run this command while in this directory:

```bash
# -v: verbose
pytest -v
```

Otherwise, specify it like this:

```bash
pytest <PATH TO THIS TEST DIR>/ -v
```

## Configuration

Use the `[tool.pytest.ini_options]` section of VerifyML's [`pyproject.toml` file](https://github.com/cylynx/verifyml/blob/main/pyproject.toml) to configure `pytest`.
Empty file.
121 changes: 121 additions & 0 deletions verifyml/model_tests/FEAT/tests/test_DataShift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
# Test cases for the DataShift FEAT test

from ..DataShift import DataShift

import inspect
import pandas as pd

# Sample test case datas
x_train_data = pd.DataFrame(
{"gender": ["M", "M", "M", "M", "M", "M", "F", "F", "F", "F"]}
)
x_test_data = pd.DataFrame(
{"gender": ["M", "M", "M", "M", "M", "F", "F", "F", "F", "F"]}
)


def test_plot_defaults():
"""Test that the default arguments of the plot() method are as expected."""

sig = inspect.signature(DataShift.plot)

assert sig.parameters["alpha"].default == 0.05
assert sig.parameters["save_plots"].default == True


def test_save_plots_true():
"""Test that the plot is saved to the test object when .plot(save_plots=True)."""
# init test object
data_test = DataShift(protected_attr=["gender"], method="ratio", threshold=1.5)

# run test
data_test.run(x_train=x_train_data, x_test=x_test_data)

# plot it
data_test.plot(save_plots=True)

# test object should be a dict of length 1
assert len(data_test.plots) == 1

# test object should have the specified key, and the value should be a string
assert isinstance(
data_test.plots["Probability Distribution of protected attributes"], str
)


def test_save_plots_false():
"""Test that the plot is not saved to the test object when .plot(save_plots=False)."""
# init test object
data_test = DataShift(protected_attr=["gender"], method="ratio", threshold=1.5)

# run test
data_test.run(x_train=x_train_data, x_test=x_test_data)

# plot it
data_test.plot(save_plots=False)

# nothing should be saved
assert len(data_test.plots) == 0


def test_run_ratio():
"""Test that calling .run() updates the test object's .result and .passed attributes."""
# init test object
data_test = DataShift(protected_attr=["gender"], method="ratio", threshold=1.23)

# run test
data_test.run(x_train=x_train_data, x_test=x_test_data)

assert data_test.result.loc["gender_F"]["training_distribution"] == 0.4
assert data_test.result.loc["gender_F"]["eval_distribution"] == 0.5
assert data_test.result.loc["gender_F"]["ratio"] == 1.25
assert data_test.result.loc["gender_F"]["passed"] == False

assert data_test.result.loc["gender_M"]["training_distribution"] == 0.6
assert data_test.result.loc["gender_M"]["eval_distribution"] == 0.5
assert data_test.result.loc["gender_M"]["ratio"] == 1.2
assert data_test.result.loc["gender_M"]["passed"] == True

assert data_test.passed == False


def test_run_difference():
"""Test that calling .run() updates the test object's .result and .passed attributes."""
# init test object
data_test = DataShift(protected_attr=["gender"], method="diff", threshold=0.1)

# run test
data_test.run(x_train=x_train_data, x_test=x_test_data)

assert data_test.result.loc["gender_F"]["training_distribution"] == 0.4
assert data_test.result.loc["gender_F"]["eval_distribution"] == 0.5
assert data_test.result.loc["gender_F"]["difference"] == 0.1
assert data_test.result.loc["gender_F"]["passed"] == True

assert data_test.result.loc["gender_M"]["training_distribution"] == 0.6
assert data_test.result.loc["gender_M"]["eval_distribution"] == 0.5
assert data_test.result.loc["gender_M"]["difference"] == 0.1
assert data_test.result.loc["gender_M"]["passed"] == True

assert data_test.passed == True


def test_run_chi2():
"""Test that calling .run() updates the test object's .result and .passed attributes."""
# init test object
data_test = DataShift(protected_attr=["gender"], method="chi2", threshold=1.1)

# run test
data_test.run(x_train=x_train_data, x_test=x_test_data)

assert data_test.result.loc["gender_F"]["training_distribution"] == 0.4
assert data_test.result.loc["gender_F"]["eval_distribution"] == 0.5
assert data_test.result.loc["gender_F"]["p-value"] == 1
assert data_test.result.loc["gender_F"]["passed"] == False

assert data_test.result.loc["gender_M"]["training_distribution"] == 0.6
assert data_test.result.loc["gender_M"]["eval_distribution"] == 0.5
assert data_test.result.loc["gender_M"]["p-value"] == 1
assert data_test.result.loc["gender_M"]["passed"] == False

assert data_test.passed == False
79 changes: 79 additions & 0 deletions verifyml/model_tests/FEAT/tests/test_FeatureImportance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Test cases for the FeatureImportance FEAT test

from ..FeatureImportance import FeatureImportance

import inspect
import pandas as pd

# Sample test case data
test_data = pd.DataFrame(
{
"features": [
"income",
"gender_M",
"gender_F",
"married_No",
"amt",
"age",
"married_Yes",
],
"value": [0.7, 0.4, 0.2, 0.1, 0.6, 0.5, 0.3],
}
)


def test_plot_defaults():
"""Test that the default arguments of the plot() method are as expected."""

sig = inspect.signature(FeatureImportance.plot)

assert sig.parameters["show_n"].default == 10
assert sig.parameters["save_plots"].default == True


def test_save_plots_true():
"""Test that the plot is saved to the test object when .plot(save_plots=True)."""
# init test object
imp_test = FeatureImportance(attrs=["gender", "married"], threshold=4)

# plot it
imp_test.plot(test_data, save_plots=True)

# test object should be a dict of length 1
assert len(imp_test.plots) == 1

# test object should have the specified key, and the value should be a string
assert isinstance(imp_test.plots["Feature Importance Plot"], str)


def test_save_plots_false():
"""Test that the plot is not saved to the test object when .plot(save_plots=False)."""
# init test object
imp_test = FeatureImportance(attrs=["gender", "married"], threshold=4)

# plot it
imp_test.plot(test_data, save_plots=False)

# nothing should be saved
assert len(imp_test.plots) == 0


def test_run():
"""Test that calling .run() updates the test object's .result and .passed attributes."""
# init test object
imp_test = FeatureImportance(attrs=["gender", "married"], threshold=4)

# run test
imp_test.run(test_data)

imp_test.result.loc["gender_M"].feature_rank == 4
imp_test.result.loc["gender_F"].feature_rank == 6
imp_test.result.loc["married_Yes"].feature_rank == 5
imp_test.result.loc["married_No"].feature_rank == 7

imp_test.result.loc["gender_M"].passed == False
imp_test.result.loc["gender_F"].passed == True
imp_test.result.loc["married_Yes"].passed == True
imp_test.result.loc["married_No"].passed == True

assert imp_test.passed == False
Loading