Skip to content

Commit

Permalink
Add fisher's exact (#373)
Browse files Browse the repository at this point in the history
* initial commit

* Add Fisher's exact test

* Replace MetricHtmlInfo by BaseWidgetInfo. Make id uuid by default.

* New data drift metrics (#339)

* rework data drift metrics

* fix format and imports

* fix notebooks

* add empty check after data clean for drift + some refactoring

* fix imports

* add threshold for DatasetDriftMetric
add tails in DatasetDriftMetric visual

* refactor data drift

* refactor data drift

* add tests for DatasetDriftMetric

* fix checks and titles for drift

* fix style

* update title in ColumnDriftMetric

* implement columns for DatasetDriftMetric and DataDriftTable

* fix data structure and json output for DataDriftTable

* fix data structure and json output for DatasetDriftMetric

* fix after main merge

* fix with black

* add reworked ColumnRegExpMetric (#348)

* add reworked ColumnRegExpMetric

* move ColumnRegExpMetric to a separate module, fix visual, add unittests

* fix table in html view, update an example

* fix ColumnRegExpMetric import in notebooks

* fix notebook imports

* add tabs for ColumnRegExpMetric

* fix after main merge

* fix after main merge

* fix imports with isort

* add anderson ksamp and its test

* fix doc

* fix description

* added hellinger_distance for drift detection

* isort

* Delete index.js.LICENSE.txt

* Delete index.js

* Added some examples of metrics and metric presets usage
Added some examples of tests and test presets usage
Removed outdated example with metrics

* move ColumnRegExpMetric data classes to the metric module (#360)

* fix warning about duplicated columns in data drift (#361)

* fix warning about duplicated columns in correlation calculation in data drift

* make a new list, do not modify num_feature_names

* Added the example of stattest specification for TestSuites

* Update readme.md

* Update readme.md

* add anderson example in notebook

* remove used features from wasserstein

* fix anderson not found

* check custom test

* Update all-tests.md

* Update run-tests.md

* Update run-tests.md

* Update README.md

* Add files via upload

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update examples.md

* Update examples.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* Update README.md

* fix value error messages in data drift calculations (#367)

* fix value error messages in data drift calculations

* add error messages about missed column

* Update missing values metrics (#357)

* implement ColumnMissingValuesMetric and move DataIntegrityNullValuesMetrics to DatasetMissingValuesMetric

* fix isort and black

* fix notebook import and naming

* fix isort + black

* fix ColumnMissingValuesMetricRenderer and DatasetMissingValuesMetricRenderer

* ass sort in ColumnMissingValuesMetric

* fix ColumnMissingValuesMetric view

* fix DatasetMissingValuesMetric view

* some rename null values -> missed values

* fix flake8

* add ColumnMissingValuesMetric unit tests

* move DatasetMissingValuesMetric to a separate module

* add test_dataset_missing_values_metrics_value_error

* fix number_of_rows_with_nulls

* fix labels texts

* update notebook example

* initial commit

* Add Fisher's exact test

* Update test_stattests.py

* fix lint,sort

* Fix contingency matrix boundary cases, and add tests

* fix conflicts

* fix fisher's exact test

* fix mypy

* fix black and remove checks

Co-authored-by: Mert Bozkır <mert.bozkirr@gmail.com>
Co-authored-by: Vyacheslav Morov <v.morov@corp.mail.ru>
Co-authored-by: Tapot <novakche@yandex.ru>
Co-authored-by: inderpreetsingh01 <inderpreetsinghchhabra23@gmail.com>
Co-authored-by: inderpreetsingh01 <54892545+inderpreetsingh01@users.noreply.github.com>
Co-authored-by: Emeli Dral <emeli.dral@gmail.com>
Co-authored-by: elenasamuylova <67064421+elenasamuylova@users.noreply.github.com>
  • Loading branch information
8 people committed Oct 26, 2022
1 parent 1535059 commit 10ea83c
Show file tree
Hide file tree
Showing 8 changed files with 242 additions and 2 deletions.
4 changes: 4 additions & 0 deletions docs/book/customization/options-for-statistical-tests.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,10 @@ example_stat_test = StatTest(
- only for numerical features
- returns `p_value`
- drift detected when `p_value < threshold`
- `fisher_exact` - Fisher's Exact test
- only for categorical features
- returns `p_value`
- drift detected when `p_value < threshold`
- `cramer_von_mises` - Cramer-Von-Mises test
- only for numerical features
- returns `p-value`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@
"* 'psi' \n",
"* 'wasserstein'\n",
"* 'anderson'\n",
"* 'fisher'\n",
"* 'cramer_von_mises'\n",
"* 'g_test'\n",
"\n",
Expand Down Expand Up @@ -345,7 +346,7 @@
"provenance": []
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -359,7 +360,12 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
"version": "3.10.7 (tags/v3.10.7:6cc6b13, Sep 5 2022, 14:08:36) [MSC v.1933 64 bit (AMD64)]"
},
"vscode": {
"interpreter": {
"hash": "f1fdbb9839a2a71583b007f6f8ccc2efefb09edbe218b32fc0a8118d70971461"
}
}
},
"nbformat": 4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,8 @@
"* 'kl_div' \n",
"* 'psi' \n",
"* 'wasserstein'\n",
"* 'anderson'\n",
"* 'fisher'\n",
"\n",
"You can implement a custom drift test and use it in DataDriftOptions. Just define a function that takes two pd.Series (reference and current data) and returns a number (e.g. p_value or distance)\n",
"\n",
Expand Down
1 change: 1 addition & 0 deletions src/evidently/calculations/stattests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .anderson_darling_stattest import anderson_darling_test
from .chisquare_stattest import chi_stat_test
from .cramer_von_mises_stattest import cramer_von_mises
from .fisher_exact_stattest import fisher_exact_test
from .g_stattest import g_test
from .jensenshannon import jensenshannon_stat_test
from .kl_div import kl_div_stat_test
Expand Down
56 changes: 56 additions & 0 deletions src/evidently/calculations/stattests/fisher_exact_stattest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from typing import Tuple

import numpy as np
import pandas as pd
from scipy.stats import fisher_exact

from evidently.calculations.stattests.registry import StatTest
from evidently.calculations.stattests.registry import register_stattest

from .utils import generate_fisher2x2_contingency_table


def _fisher_exact_stattest(
reference_data: pd.Series, current_data: pd.Series, feature_type: str, threshold: float
) -> Tuple[float, bool]:
"""Calculate the p-value of Fisher's exact test between two arrays
Args:
reference_data: reference data
current_data: current data
feature_type: feature type
threshold: all values above this threshold means data drift
Raises:
ValueError: If null or inf values is found in either reference_data or current_data
ValueError: If reference_data or current_data is not binary(unique values exceeds 2)
Returns:
p_value: two-tailed p-value
test_result: whether the drift is detected
"""

if (
(reference_data.isnull().values.any())
or (current_data.isnull().values.any())
or (reference_data.isin([np.inf, -np.inf]).any())
or (current_data.isin([np.inf, -np.inf]).any())
):
raise ValueError(
"Null or inf values found in either reference_data or current_data. Please ensure that no null or inf values are present"
)

if (reference_data.nunique() > 2) or (current_data.nunique() > 2):
raise ValueError("Expects binary data for both reference and current, but found unique categories > 2")

contingency_matrix = generate_fisher2x2_contingency_table(reference_data, current_data)
_, p_value = fisher_exact(contingency_matrix)
return p_value, p_value < threshold


fisher_exact_test = StatTest(
name="fisher_exact",
display_name="Fisher's Exact test",
func=_fisher_exact_stattest,
allowed_feature_types=["cat"],
default_threshold=0.1,
)

register_stattest(fisher_exact_test)
37 changes: 37 additions & 0 deletions src/evidently/calculations/stattests/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from itertools import product

import numpy as np
import pandas as pd

Expand Down Expand Up @@ -39,3 +41,38 @@ def get_binned_data(
np.place(current_percents, current_percents == 0, 0.0001)

return reference_percents, current_percents


def generate_fisher2x2_contingency_table(reference_data: pd.Series, current_data: pd.Series) -> np.ndarray:
"""Generate 2x2 contingency matrix for fisher exact test
Args:
reference_data: reference data
current_data: current data
Raises:
ValueError: if reference_data and current_data are not of equal length
Returns:
contingency_matrix: contingency_matrix for binary data
"""
if reference_data.shape[0] != current_data.shape[0]:
raise ValueError(
"reference_data and current_data are not of equal length, please ensure that they are of equal length"
)
unique_categories = set(reference_data.unique().tolist() + current_data.unique().tolist())
if len(unique_categories) != 2:
unique_categories.add("placeholder")

unique_categories = list(unique_categories) # type: ignore
unique_categories = dict(zip(unique_categories, [0, 1])) # type: ignore

reference_data = reference_data.map(unique_categories).values
current_data = current_data.map(unique_categories).values

zero_ref = reference_data.size - np.count_nonzero(reference_data)
one_ref = np.count_nonzero(reference_data)

zero_cur = current_data.size - np.count_nonzero(current_data)
one_cur = np.count_nonzero(current_data)

contingency_table = np.array([[one_cur, zero_cur], [one_ref, zero_ref]])

return contingency_table
33 changes: 33 additions & 0 deletions tests/calculations/stattests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
import pytest

from evidently.calculations.stattests.utils import generate_fisher2x2_contingency_table
from evidently.calculations.stattests.utils import get_unique_not_nan_values_list_from_series


Expand All @@ -19,3 +20,35 @@ def test_get_unique_not_nan_values_list_from_series(current_data: pd.Series, ref
assert set(
get_unique_not_nan_values_list_from_series(current_data=current_data, reference_data=reference_data)
) == set(expected_list)


@pytest.mark.parametrize(
"reference_data, current_data ,expected_contingency_table",
(
(pd.Series([1, 0, 1, 0]), pd.Series([1, 0, 1, 0]), np.array([[2, 2], [2, 2]])),
(pd.Series([1, 1, 1, 1]), pd.Series([0, 0, 0, 0]), np.array([[0, 4], [4, 0]])),
(pd.Series([0, 0, 0, 0]), pd.Series([0, 0, 0, 0]), np.array([[0, 4], [0, 4]])),
(pd.Series([1, 1, 1, 0]), pd.Series([0, 1, 1, 0]), np.array([[2, 2], [3, 1]])),
),
)
def test_generate_fisher2x2_contingency_table(
current_data: pd.Series, reference_data: pd.Series, expected_contingency_table: np.ndarray
):
assert (generate_fisher2x2_contingency_table(reference_data, current_data) == expected_contingency_table).all()


@pytest.mark.parametrize(
"reference_data, current_data",
(
(pd.Series([1, 0, 1]), pd.Series([1, 0, 1, 0])),
(pd.Series([1, 1, 1, 1]), pd.Series([0])),
),
)
def test_input_data_length_check_generate_fisher2x2_contingency_table(
reference_data: pd.Series, current_data: pd.Series
):
with pytest.raises(
ValueError,
match="reference_data and current_data are not of equal length, please ensure that they are of equal length",
):
generate_fisher2x2_contingency_table(current_data, reference_data)
101 changes: 101 additions & 0 deletions tests/stattests/test_stattests.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from evidently.calculations.stattests.anderson_darling_stattest import anderson_darling_test
from evidently.calculations.stattests.chisquare_stattest import chi_stat_test
from evidently.calculations.stattests.cramer_von_mises_stattest import cramer_von_mises
from evidently.calculations.stattests.fisher_exact_stattest import fisher_exact_test
from evidently.calculations.stattests.g_stattest import g_test
from evidently.calculations.stattests.hellinger_distance import hellinger_stat_test
from evidently.calculations.stattests.mann_whitney_urank_stattest import mann_whitney_u_stat_test
Expand Down Expand Up @@ -126,6 +127,106 @@ def test_hellinger_distance() -> None:
)


@pytest.mark.parametrize(
"reference, current, threshold, expected_pvalue, drift_detected",
(
(
pd.Series(["a", "b", "b", "a", "a", "b"] * 15),
pd.Series(["b", "b", "a", "b", "b", "a"] * 15),
0.1,
approx(0.033, abs=1e-3),
True,
),
(
pd.Series(["a", "b", "b", "a", "a", "b"]),
pd.Series(["a", "a", "a", "a", "a", "a"]),
0.1,
approx(0.181, abs=1e-3),
False,
),
(
pd.Series(["a", "a", "a", "a", "a", "a"]),
pd.Series(["a", "a", "a", "a", "a", "a"]),
0.1,
approx(1.0, abs=1e-3),
False,
),
(
pd.Series(["a", "b", "b", "b", "a", "b"]),
pd.Series(["b", "b", "b", "a", "b", "a"]),
0.1,
approx(1.0, abs=1e-3),
False,
),
(
pd.Series(["a", "a", "a", "a", "a", "a"]),
pd.Series(["b", "b", "b", "b", "b", "b"]),
0.1,
approx(0.0021, abs=1e-3),
True,
),
(
pd.Series(["a", "a", "a", "b", "b"] * 30),
pd.Series(["b", "b", "b", "a", "a"] * 30),
0.1,
approx(0.00078, abs=1e-3),
True,
),
),
)
def test_pvalue_fisher_exact(
reference: pd.Series, current: pd.Series, threshold: float, expected_pvalue: float, drift_detected: bool
) -> None:
assert fisher_exact_test.func(reference, current, "cat", threshold) == (
approx(expected_pvalue, abs=1e-3),
drift_detected,
)


@pytest.mark.parametrize(
"reference, current",
(
(
pd.Series(["a", np.nan, "b", "a", "a", "b"]),
pd.Series(["b", "b", "a", "b", "b", "a"]),
),
(
pd.Series(["a", np.nan, "a", "a", "b"]),
pd.Series(["a", "a", "a", "a", np.nan, "a"]),
),
(pd.Series([np.inf, np.nan, np.nan, "a", "b", "a"]), pd.Series(["a", "a", np.inf, "a", "a", "b"])),
(pd.Series([-np.inf, "b", np.nan, "b", "a", "b"]), pd.Series(["b", np.inf, "b", "a", "b", "a"])),
),
)
def test_for_null_fisher_exact(reference: pd.Series, current: pd.Series) -> None:
with pytest.raises(
ValueError,
match="Null or inf values found in either reference_data or current_data. Please ensure that no null or inf values are present",
):
fisher_exact_test.func(reference, current, "cat", 0.1)


@pytest.mark.parametrize(
"reference, current,",
(
(
pd.Series(["a", "c", "a", "a", "a", "b"]),
pd.Series(["b", "b", "a", "b", "b", "a"]),
),
(
pd.Series(["a", 1, "a", 3, "b", "m"]),
pd.Series(["a", "a", 2, "a", "b", "a"]),
),
),
)
def test_for_multiple_categories_fisher_exact(reference: pd.Series, current: pd.Series) -> None:
with pytest.raises(
ValueError,
match="Expects binary data for both reference and current, but found unique categories > 2",
):
fisher_exact_test.func(reference, current, "cat", 0.1)


def test_mann_whitney() -> None:
reference = pd.Series([1, 2, 3, 4, 5, 6]).repeat([16, 18, 16, 14, 12, 12])
current = pd.Series([1, 2, 3, 4, 5, 6]).repeat([16, 16, 16, 16, 16, 8])
Expand Down

0 comments on commit 10ea83c

Please sign in to comment.