Add fisher's exact (#373)

* initial commit * Add Fisher's exact test * Replace MetricHtmlInfo by BaseWidgetInfo. Make id uuid by default. * New data drift metrics (#339) * rework data drift metrics * fix format and imports * fix notebooks * add empty check after data clean for drift + some refactoring * fix imports * add threshold for DatasetDriftMetric add tails in DatasetDriftMetric visual * refactor data drift * refactor data drift * add tests for DatasetDriftMetric * fix checks and titles for drift * fix style * update title in ColumnDriftMetric * implement columns for DatasetDriftMetric and DataDriftTable * fix data structure and json output for DataDriftTable * fix data structure and json output for DatasetDriftMetric * fix after main merge * fix with black * add reworked ColumnRegExpMetric (#348) * add reworked ColumnRegExpMetric * move ColumnRegExpMetric to a separate module, fix visual, add unittests * fix table in html view, update an example * fix ColumnRegExpMetric import in notebooks * fix notebook imports * add tabs for ColumnRegExpMetric * fix after main merge * fix after main merge * fix imports with isort * add anderson ksamp and its test * fix doc * fix description * added hellinger_distance for drift detection * isort * Delete index.js.LICENSE.txt * Delete index.js * Added some examples of metrics and metric presets usage Added some examples of tests and test presets usage Removed outdated example with metrics * move ColumnRegExpMetric data classes to the metric module (#360) * fix warning about duplicated columns in data drift (#361) * fix warning about duplicated columns in correlation calculation in data drift * make a new list, do not modify num_feature_names * Added the example of stattest specification for TestSuites * Update readme.md * Update readme.md * add anderson example in notebook * remove used features from wasserstein * fix anderson not found * check custom test * Update all-tests.md * Update run-tests.md * Update run-tests.md * Update README.md * Add files via upload * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update examples.md * Update examples.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * Update README.md * fix value error messages in data drift calculations (#367) * fix value error messages in data drift calculations * add error messages about missed column * Update missing values metrics (#357) * implement ColumnMissingValuesMetric and move DataIntegrityNullValuesMetrics to DatasetMissingValuesMetric * fix isort and black * fix notebook import and naming * fix isort + black * fix ColumnMissingValuesMetricRenderer and DatasetMissingValuesMetricRenderer * ass sort in ColumnMissingValuesMetric * fix ColumnMissingValuesMetric view * fix DatasetMissingValuesMetric view * some rename null values -> missed values * fix flake8 * add ColumnMissingValuesMetric unit tests * move DatasetMissingValuesMetric to a separate module * add test_dataset_missing_values_metrics_value_error * fix number_of_rows_with_nulls * fix labels texts * update notebook example * initial commit * Add Fisher's exact test * Update test_stattests.py * fix lint,sort * Fix contingency matrix boundary cases, and add tests * fix conflicts * fix fisher's exact test * fix mypy * fix black and remove checks Co-authored-by: Mert Bozkır <mert.bozkirr@gmail.com> Co-authored-by: Vyacheslav Morov <v.morov@corp.mail.ru> Co-authored-by: Tapot <novakche@yandex.ru> Co-authored-by: inderpreetsingh01 <inderpreetsinghchhabra23@gmail.com> Co-authored-by: inderpreetsingh01 <54892545+inderpreetsingh01@users.noreply.github.com> Co-authored-by: Emeli Dral <emeli.dral@gmail.com> Co-authored-by: elenasamuylova <67064421+elenasamuylova@users.noreply.github.com>
evidentlyai · Oct 26, 2022 · 10ea83c · 10ea83c
1 parent 1535059
commit 10ea83c
Show file tree

Hide file tree

Showing 8 changed files with 242 additions and 2 deletions.
diff --git a/docs/book/customization/options-for-statistical-tests.md b/docs/book/customization/options-for-statistical-tests.md
@@ -164,6 +164,10 @@ example_stat_test = StatTest(
   - only for numerical features
   - returns `p_value`
   - drift detected when `p_value < threshold`
+- `fisher_exact` - Fisher's Exact test
+  - only for categorical features
+  - returns `p_value`
+  - drift detected when `p_value < threshold`
 - `cramer_von_mises` - Cramer-Von-Mises test
   - only for numerical features
   - returns `p-value`

diff --git a/examples/how_to_questions/how_to_specify_stattest_for_a_testsuite.ipynb b/examples/how_to_questions/how_to_specify_stattest_for_a_testsuite.ipynb
@@ -124,6 +124,7 @@
     "* 'psi' \n",
     "* 'wasserstein'\n",
     "* 'anderson'\n",
+    "* 'fisher'\n",
     "* 'cramer_von_mises'\n",
     "* 'g_test'\n",
     "\n",
@@ -345,7 +346,7 @@
    "provenance": []
   },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -359,7 +360,12 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.12"
+   "version": "3.10.7 (tags/v3.10.7:6cc6b13, Sep  5 2022, 14:08:36) [MSC v.1933 64 bit (AMD64)]"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "f1fdbb9839a2a71583b007f6f8ccc2efefb09edbe218b32fc0a8118d70971461"
+   }
   }
  },
  "nbformat": 4,

diff --git a/examples/how_to_questions/stat_test_specification_for_data_drift_adult.ipynb b/examples/how_to_questions/stat_test_specification_for_data_drift_adult.ipynb
@@ -278,6 +278,8 @@
     "* 'kl_div' \n",
     "* 'psi' \n",
     "* 'wasserstein'\n",
+    "* 'anderson'\n",
+    "* 'fisher'\n",
     "\n",
     "You can implement a custom drift test and use it in DataDriftOptions. Just define a function that takes two pd.Series (reference and current data) and returns a number (e.g. p_value or distance)\n",
     "\n",

diff --git a/src/evidently/calculations/stattests/__init__.py b/src/evidently/calculations/stattests/__init__.py
@@ -3,6 +3,7 @@
 from .anderson_darling_stattest import anderson_darling_test
 from .chisquare_stattest import chi_stat_test
 from .cramer_von_mises_stattest import cramer_von_mises
+from .fisher_exact_stattest import fisher_exact_test
 from .g_stattest import g_test
 from .jensenshannon import jensenshannon_stat_test
 from .kl_div import kl_div_stat_test

diff --git a/src/evidently/calculations/stattests/fisher_exact_stattest.py b/src/evidently/calculations/stattests/fisher_exact_stattest.py
@@ -0,0 +1,56 @@
+from typing import Tuple
+
+import numpy as np
+import pandas as pd
+from scipy.stats import fisher_exact
+
+from evidently.calculations.stattests.registry import StatTest
+from evidently.calculations.stattests.registry import register_stattest
+
+from .utils import generate_fisher2x2_contingency_table
+
+
+def _fisher_exact_stattest(
+    reference_data: pd.Series, current_data: pd.Series, feature_type: str, threshold: float
+) -> Tuple[float, bool]:
+    """Calculate the p-value of Fisher's exact test between two arrays
+    Args:
+        reference_data: reference data
+        current_data: current data
+        feature_type: feature type
+        threshold: all values above this threshold means data drift
+    Raises:
+        ValueError: If null or inf values is found in either reference_data or current_data
+        ValueError: If reference_data or current_data is not binary(unique values exceeds 2)
+    Returns:
+        p_value: two-tailed p-value
+        test_result: whether the drift is detected
+    """
+
+    if (
+        (reference_data.isnull().values.any())
+        or (current_data.isnull().values.any())
+        or (reference_data.isin([np.inf, -np.inf]).any())
+        or (current_data.isin([np.inf, -np.inf]).any())
+    ):
+        raise ValueError(
+            "Null or inf values found in either reference_data or current_data. Please ensure that no null or inf values are present"
+        )
+
+    if (reference_data.nunique() > 2) or (current_data.nunique() > 2):
+        raise ValueError("Expects binary data for both reference and current, but found unique categories > 2")
+
+    contingency_matrix = generate_fisher2x2_contingency_table(reference_data, current_data)
+    _, p_value = fisher_exact(contingency_matrix)
+    return p_value, p_value < threshold
+
+
+fisher_exact_test = StatTest(
+    name="fisher_exact",
+    display_name="Fisher's Exact test",
+    func=_fisher_exact_stattest,
+    allowed_feature_types=["cat"],
+    default_threshold=0.1,
+)
+
+register_stattest(fisher_exact_test)
diff --git a/src/evidently/calculations/stattests/utils.py b/src/evidently/calculations/stattests/utils.py
@@ -1,3 +1,5 @@
+from itertools import product
+
 import numpy as np
 import pandas as pd
 
@@ -39,3 +41,38 @@ def get_binned_data(
         np.place(current_percents, current_percents == 0, 0.0001)
 
     return reference_percents, current_percents
+
+
+def generate_fisher2x2_contingency_table(reference_data: pd.Series, current_data: pd.Series) -> np.ndarray:
+    """Generate 2x2 contingency matrix for fisher exact test
+    Args:
+        reference_data: reference data
+        current_data: current data
+    Raises:
+        ValueError: if reference_data and current_data are not of equal length
+    Returns:
+        contingency_matrix: contingency_matrix for binary data
+    """
+    if reference_data.shape[0] != current_data.shape[0]:
+        raise ValueError(
+            "reference_data and current_data are not of equal length, please ensure that they are of equal length"
+        )
+    unique_categories = set(reference_data.unique().tolist() + current_data.unique().tolist())
+    if len(unique_categories) != 2:
+        unique_categories.add("placeholder")
+
+    unique_categories = list(unique_categories)  # type: ignore
+    unique_categories = dict(zip(unique_categories, [0, 1]))  # type: ignore
+
+    reference_data = reference_data.map(unique_categories).values
+    current_data = current_data.map(unique_categories).values
+
+    zero_ref = reference_data.size - np.count_nonzero(reference_data)
+    one_ref = np.count_nonzero(reference_data)
+
+    zero_cur = current_data.size - np.count_nonzero(current_data)
+    one_cur = np.count_nonzero(current_data)
+
+    contingency_table = np.array([[one_cur, zero_cur], [one_ref, zero_ref]])
+
+    return contingency_table
diff --git a/tests/calculations/stattests/test_utils.py b/tests/calculations/stattests/test_utils.py
@@ -2,6 +2,7 @@
 import pandas as pd
 import pytest
 
+from evidently.calculations.stattests.utils import generate_fisher2x2_contingency_table
 from evidently.calculations.stattests.utils import get_unique_not_nan_values_list_from_series
 
 
@@ -19,3 +20,35 @@ def test_get_unique_not_nan_values_list_from_series(current_data: pd.Series, ref
     assert set(
         get_unique_not_nan_values_list_from_series(current_data=current_data, reference_data=reference_data)
     ) == set(expected_list)
+
+
+@pytest.mark.parametrize(
+    "reference_data, current_data ,expected_contingency_table",
+    (
+        (pd.Series([1, 0, 1, 0]), pd.Series([1, 0, 1, 0]), np.array([[2, 2], [2, 2]])),
+        (pd.Series([1, 1, 1, 1]), pd.Series([0, 0, 0, 0]), np.array([[0, 4], [4, 0]])),
+        (pd.Series([0, 0, 0, 0]), pd.Series([0, 0, 0, 0]), np.array([[0, 4], [0, 4]])),
+        (pd.Series([1, 1, 1, 0]), pd.Series([0, 1, 1, 0]), np.array([[2, 2], [3, 1]])),
+    ),
+)
+def test_generate_fisher2x2_contingency_table(
+    current_data: pd.Series, reference_data: pd.Series, expected_contingency_table: np.ndarray
+):
+    assert (generate_fisher2x2_contingency_table(reference_data, current_data) == expected_contingency_table).all()
+
+
+@pytest.mark.parametrize(
+    "reference_data, current_data",
+    (
+        (pd.Series([1, 0, 1]), pd.Series([1, 0, 1, 0])),
+        (pd.Series([1, 1, 1, 1]), pd.Series([0])),
+    ),
+)
+def test_input_data_length_check_generate_fisher2x2_contingency_table(
+    reference_data: pd.Series, current_data: pd.Series
+):
+    with pytest.raises(
+        ValueError,
+        match="reference_data and current_data are not of equal length, please ensure that they are of equal length",
+    ):
+        generate_fisher2x2_contingency_table(current_data, reference_data)
diff --git a/tests/stattests/test_stattests.py b/tests/stattests/test_stattests.py
@@ -7,6 +7,7 @@
 from evidently.calculations.stattests.anderson_darling_stattest import anderson_darling_test
 from evidently.calculations.stattests.chisquare_stattest import chi_stat_test
 from evidently.calculations.stattests.cramer_von_mises_stattest import cramer_von_mises
+from evidently.calculations.stattests.fisher_exact_stattest import fisher_exact_test
 from evidently.calculations.stattests.g_stattest import g_test
 from evidently.calculations.stattests.hellinger_distance import hellinger_stat_test
 from evidently.calculations.stattests.mann_whitney_urank_stattest import mann_whitney_u_stat_test
@@ -126,6 +127,106 @@ def test_hellinger_distance() -> None:
     )
 
 
+@pytest.mark.parametrize(
+    "reference, current, threshold, expected_pvalue, drift_detected",
+    (
+        (
+            pd.Series(["a", "b", "b", "a", "a", "b"] * 15),
+            pd.Series(["b", "b", "a", "b", "b", "a"] * 15),
+            0.1,
+            approx(0.033, abs=1e-3),
+            True,
+        ),
+        (
+            pd.Series(["a", "b", "b", "a", "a", "b"]),
+            pd.Series(["a", "a", "a", "a", "a", "a"]),
+            0.1,
+            approx(0.181, abs=1e-3),
+            False,
+        ),
+        (
+            pd.Series(["a", "a", "a", "a", "a", "a"]),
+            pd.Series(["a", "a", "a", "a", "a", "a"]),
+            0.1,
+            approx(1.0, abs=1e-3),
+            False,
+        ),
+        (
+            pd.Series(["a", "b", "b", "b", "a", "b"]),
+            pd.Series(["b", "b", "b", "a", "b", "a"]),
+            0.1,
+            approx(1.0, abs=1e-3),
+            False,
+        ),
+        (
+            pd.Series(["a", "a", "a", "a", "a", "a"]),
+            pd.Series(["b", "b", "b", "b", "b", "b"]),
+            0.1,
+            approx(0.0021, abs=1e-3),
+            True,
+        ),
+        (
+            pd.Series(["a", "a", "a", "b", "b"] * 30),
+            pd.Series(["b", "b", "b", "a", "a"] * 30),
+            0.1,
+            approx(0.00078, abs=1e-3),
+            True,
+        ),
+    ),
+)
+def test_pvalue_fisher_exact(
+    reference: pd.Series, current: pd.Series, threshold: float, expected_pvalue: float, drift_detected: bool
+) -> None:
+    assert fisher_exact_test.func(reference, current, "cat", threshold) == (
+        approx(expected_pvalue, abs=1e-3),
+        drift_detected,
+    )
+
+
+@pytest.mark.parametrize(
+    "reference, current",
+    (
+        (
+            pd.Series(["a", np.nan, "b", "a", "a", "b"]),
+            pd.Series(["b", "b", "a", "b", "b", "a"]),
+        ),
+        (
+            pd.Series(["a", np.nan, "a", "a", "b"]),
+            pd.Series(["a", "a", "a", "a", np.nan, "a"]),
+        ),
+        (pd.Series([np.inf, np.nan, np.nan, "a", "b", "a"]), pd.Series(["a", "a", np.inf, "a", "a", "b"])),
+        (pd.Series([-np.inf, "b", np.nan, "b", "a", "b"]), pd.Series(["b", np.inf, "b", "a", "b", "a"])),
+    ),
+)
+def test_for_null_fisher_exact(reference: pd.Series, current: pd.Series) -> None:
+    with pytest.raises(
+        ValueError,
+        match="Null or inf values found in either reference_data or current_data. Please ensure that no null or inf values are present",
+    ):
+        fisher_exact_test.func(reference, current, "cat", 0.1)
+
+
+@pytest.mark.parametrize(
+    "reference, current,",
+    (
+        (
+            pd.Series(["a", "c", "a", "a", "a", "b"]),
+            pd.Series(["b", "b", "a", "b", "b", "a"]),
+        ),
+        (
+            pd.Series(["a", 1, "a", 3, "b", "m"]),
+            pd.Series(["a", "a", 2, "a", "b", "a"]),
+        ),
+    ),
+)
+def test_for_multiple_categories_fisher_exact(reference: pd.Series, current: pd.Series) -> None:
+    with pytest.raises(
+        ValueError,
+        match="Expects binary data for both reference and current, but found unique categories > 2",
+    ):
+        fisher_exact_test.func(reference, current, "cat", 0.1)
+
+
 def test_mann_whitney() -> None:
     reference = pd.Series([1, 2, 3, 4, 5, 6]).repeat([16, 18, 16, 14, 12, 12])
     current = pd.Series([1, 2, 3, 4, 5, 6]).repeat([16, 16, 16, 16, 16, 8])