-
Notifications
You must be signed in to change notification settings - Fork 247
/
train_test_samples_mix.py
51 lines (42 loc) · 1.95 KB
/
train_test_samples_mix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# ----------------------------------------------------------------------------
# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
"""module contains base logic for the train-test samples mix checks."""
import abc
import typing as t
from typing_extensions import Self
from deepchecks.core import ConditionCategory, ConditionResult
from deepchecks.utils.strings import format_percent
__all__ = ['TrainTestSamplesMixAbstract']
class TrainTestSamplesMixAbstract(abc.ABC):
"""Base data duplicates check."""
add_condition: t.Callable[..., t.Any]
def add_condition_duplicates_ratio_less_or_equal(self: Self, max_ratio: float = 0.05) -> Self:
"""Add condition - require ratio of test data samples that appear in train data to be less or equal to the\
threshold.
Parameters
----------
max_ratio : float , default: 0.05
Max allowed ratio of test data samples to appear in train data
"""
def condition(result: dict) -> ConditionResult:
ratio = result['ratio']
details = (
f'Percent of test data samples that appear in train data: {format_percent(ratio)}'
if ratio
else 'No samples mix found'
)
category = ConditionCategory.PASS if ratio <= max_ratio else ConditionCategory.FAIL
return ConditionResult(category, details)
return self.add_condition(
f'Percentage of test data samples that appear in train data '
f'is less or equal to {format_percent(max_ratio)}',
condition
)