-
Notifications
You must be signed in to change notification settings - Fork 247
/
single_dataset_performance.py
69 lines (58 loc) · 2.83 KB
/
single_dataset_performance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# ----------------------------------------------------------------------------
# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
"""Module containing the single dataset performance check."""
from numbers import Number
from typing import Callable, Dict, List, Union
import pandas as pd
from deepchecks.core import CheckResult
from deepchecks.core.check_utils.single_dataset_performance_base import BaseSingleDatasetPerformance
from deepchecks.nlp.base_checks import SingleDatasetCheck
from deepchecks.nlp.context import Context
from deepchecks.nlp.metric_utils.scorers import infer_on_text_data
__all__ = ['SingleDatasetPerformance']
class SingleDatasetPerformance(SingleDatasetCheck, BaseSingleDatasetPerformance):
"""Summarize given model performance on a dataset based on selected scorers.
Parameters
----------
scorers : Union[List[str], Dict[str, Union[str, Callable]]], default: None
List of scorers to use. If None, use default scorers.
Scorers can be supplied as a list of scorer names or as a dictionary of names and functions.
n_samples : int , default: 10_000
Maximum number of samples to use for this check.
"""
def __init__(self,
scorers: Union[List[str], Dict[str, Union[str, Callable]]] = None,
n_samples: int = 10_000,
**kwargs):
super().__init__(**kwargs)
self.scorers = scorers
self.n_samples = n_samples
def run_logic(self, context: Context, dataset_kind) -> CheckResult:
"""Run check."""
dataset = context.get_data_by_kind(dataset_kind)
dataset = dataset.sample(self.n_samples, random_state=context.random_state)
model = context.model
scorers = context.get_scorers(self.scorers, use_avg_defaults=False)
results = []
for scorer in scorers:
scorer_value = infer_on_text_data(scorer, model, dataset)
if isinstance(scorer_value, Number):
results.append([pd.NA, scorer.name, scorer_value])
else:
results.extend(
[[class_name, scorer.name, class_score]
for class_name, class_score in scorer_value.items()])
results_df = pd.DataFrame(results, columns=['Class', 'Metric', 'Value'])
if context.with_display:
display = [results_df]
else:
display = []
return CheckResult(results_df, header='Single Dataset Performance', display=display)