/
context.py
411 lines (368 loc) · 19.8 KB
/
context.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
# ----------------------------------------------------------------------------
# Copyright (C) 2021-2023 Deepchecks (https://www.deepchecks.com)
#
# This file is part of Deepchecks.
# Deepchecks is distributed under the terms of the GNU Affero General
# Public License (version 3 or later).
# You should have received a copy of the GNU Affero General Public License
# along with Deepchecks. If not, see <http://www.gnu.org/licenses/>.
# ----------------------------------------------------------------------------
#
"""Module for base tabular context."""
import typing as t
import numpy as np
import pandas as pd
from deepchecks.core.context import BaseContext
from deepchecks.core.errors import (DatasetValidationError, DeepchecksNotSupportedError, DeepchecksValueError,
ModelValidationError)
from deepchecks.tabular._shared_docs import docstrings
from deepchecks.tabular.dataset import Dataset
from deepchecks.tabular.metric_utils import DeepcheckScorer, get_default_scorers, init_validate_scorers
from deepchecks.tabular.metric_utils.scorers import validate_proba
from deepchecks.tabular.utils.feature_importance import (calculate_feature_importance_or_none,
validate_feature_importance)
from deepchecks.tabular.utils.task_inference import (get_all_labels, infer_classes_from_model,
infer_task_type_by_class_number, infer_task_type_by_labels)
from deepchecks.tabular.utils.task_type import TaskType
from deepchecks.tabular.utils.validation import (ensure_predictions_proba, ensure_predictions_shape,
model_type_validation, validate_model)
from deepchecks.utils.docref import doclink
from deepchecks.utils.logger import get_logger
from deepchecks.utils.plot import DEFAULT_DATASET_NAMES
from deepchecks.utils.typing import BasicModel
__all__ = [
'Context', '_DummyModel'
]
class _DummyModel:
"""Dummy model class used for inference with static predictions from the user.
Parameters
----------
train: Dataset
Dataset, representing data an estimator was fitted on.
test: Dataset
Dataset, representing data an estimator predicts on.
y_pred_train: t.Optional[np.ndarray]
Array of the model prediction over the train dataset.
y_pred_test: t.Optional[np.ndarray]
Array of the model prediction over the test dataset.
y_proba_train: np.ndarray
Array of the model prediction probabilities over the train dataset.
y_proba_test: np.ndarray
Array of the model prediction probabilities over the test dataset.
validate_data_on_predict: bool, default = True
If true, before predicting validates that the received data samples have the same index as in original data.
"""
feature_df_list: t.List[pd.DataFrame]
predictions: pd.DataFrame
proba: pd.DataFrame
def __init__(
self,
test: Dataset,
y_proba_test: t.Optional[np.ndarray] = None,
y_pred_test: t.Optional[np.ndarray] = None,
train: t.Optional[Dataset] = None,
y_pred_train: t.Optional[np.ndarray] = None,
y_proba_train: t.Optional[np.ndarray] = None,
validate_data_on_predict: bool = True,
model_classes: t.Optional[t.List[t.Any]] = None
):
if train is not None and test is not None:
# check if datasets have same indexes
train_index = train.data.index
test_index = test.data.index
if set(train_index) & set(test_index):
train.data.index = [f'train-{it}' for it in train_index]
test.data.index = [f'test-{it}' for it in test_index]
get_logger().warning(
'train and test datasets have common index - adding "train"/"test" '
'prefixes. To avoid that provide datasets with no common indexes '
'or pass the model object instead of the predictions.'
)
feature_df_list = []
predictions = []
probas = []
for dataset, y_pred, y_proba in (
(train, y_pred_train, y_proba_train),
(test, y_pred_test, y_proba_test),
):
if y_pred is not None and not isinstance(y_pred, np.ndarray):
y_pred = np.array(y_pred)
if y_proba is not None and not isinstance(y_proba, np.ndarray):
y_proba = np.array(y_proba)
if dataset is not None:
feature_df_list.append(dataset.features_columns)
if y_pred is None and y_proba is not None:
validate_proba(y_proba, model_classes)
y_pred = np.argmax(y_proba, axis=-1)
y_pred = np.array(model_classes)[y_pred]
if y_pred is not None:
if len(y_pred.shape) > 1 and y_pred.shape[1] == 1:
y_pred = y_pred[:, 0]
ensure_predictions_shape(y_pred, dataset.data)
y_pred_ser = pd.Series(list(y_pred), index=dataset.data.index)
predictions.append(y_pred_ser)
if y_proba is not None:
ensure_predictions_proba(y_proba, y_pred)
proba_df = pd.DataFrame(data=y_proba)
proba_df.index = dataset.data.index
probas.append(proba_df)
self.predictions = pd.concat(predictions, axis=0) if predictions else None
self.probas = pd.concat(probas, axis=0) if probas else None
self.feature_df_list = feature_df_list
self.validate_data_on_predict = validate_data_on_predict
if self.predictions is not None:
self.predict = self._predict
if self.probas is not None:
self.predict_proba = self._predict_proba
def _validate_data(self, data: pd.DataFrame):
data = data.sample(min(100, len(data)))
for feature_df in self.feature_df_list:
# If all indices are found than test for equality in actual data (statistically significant portion)
if set(data.index).issubset(set(feature_df.index)):
sample_data = np.unique(np.random.choice(data.index, 30))
if feature_df.loc[sample_data].equals(data.loc[sample_data]):
return
else:
break
raise DeepchecksValueError('Data that has not been seen before passed for inference with static '
'predictions. Pass a real model to resolve this')
def _predict(self, data: pd.DataFrame):
"""Predict on given data by the data indexes."""
if self.validate_data_on_predict:
self._validate_data(data)
return self.predictions.loc[data.index].to_numpy()
def _predict_proba(self, data: pd.DataFrame):
"""Predict probabilities on given data by the data indexes."""
if self.validate_data_on_predict:
self._validate_data(data)
return self.probas.loc[data.index].to_numpy()
def fit(self, *args, **kwargs):
"""Just for python 3.6 (sklearn validates fit method)."""
@docstrings
class Context(BaseContext):
"""Contains all the data + properties the user has passed to a check/suite, and validates it seamlessly.
Parameters
----------
train: Union[Dataset, pd.DataFrame, None] , default: None
Dataset or DataFrame object, representing data an estimator was fitted on
test: Union[Dataset, pd.DataFrame, None] , default: None
Dataset or DataFrame object, representing data an estimator predicts on
model: Optional[BasicModel] , default: None
A scikit-learn-compatible fitted estimator instance
{additional_context_params:indent}
"""
def __init__(
self,
train: t.Union[Dataset, pd.DataFrame, None] = None,
test: t.Union[Dataset, pd.DataFrame, None] = None,
model: t.Optional[BasicModel] = None,
feature_importance: t.Optional[pd.Series] = None,
feature_importance_force_permutation: bool = False,
feature_importance_timeout: int = 120,
with_display: bool = True,
y_pred_train: t.Optional[np.ndarray] = None,
y_pred_test: t.Optional[np.ndarray] = None,
y_proba_train: t.Optional[np.ndarray] = None,
y_proba_test: t.Optional[np.ndarray] = None,
model_classes: t.Optional[t.List] = None,
):
# Validations
if train is None and test is None and model is None:
raise DeepchecksValueError('At least one dataset (or model) must be passed to the method!')
if train is not None:
train = Dataset.cast_to_dataset(train)
if train.name is None:
train.name = DEFAULT_DATASET_NAMES[0]
if test is not None:
test = Dataset.cast_to_dataset(test)
if test.name is None:
test.name = DEFAULT_DATASET_NAMES[1]
# If both dataset, validate they fit each other
if train and test:
if test.has_label() and train.has_label() and not Dataset.datasets_share_label(train, test):
raise DatasetValidationError('train and test requires to have and to share the same label')
if not Dataset.datasets_share_features(train, test):
raise DatasetValidationError('train and test requires to share the same features columns')
if not Dataset.datasets_share_categorical_features(train, test):
raise DatasetValidationError(
'train and test datasets should share '
'the same categorical features. Possible reason is that some columns were'
'inferred incorrectly as categorical features. To fix this, manually edit the '
'categorical features using Dataset(cat_features=<list_of_features>'
)
if not Dataset.datasets_share_index(train, test):
raise DatasetValidationError('train and test requires to share the same index column')
if not Dataset.datasets_share_date(train, test):
raise DatasetValidationError('train and test requires to share the same date column')
if test and not train:
raise DatasetValidationError('Can\'t initialize context with only test. if you have single dataset, '
'initialize it as train')
self._calculated_importance = feature_importance is not None or model is None
if model is not None:
# Here validate only type of model, later validating it can predict on the data if needed
model_type_validation(model)
if feature_importance is not None:
feature_importance = validate_feature_importance(feature_importance, train.features)
if model_classes and len(model_classes) == 0:
raise DeepchecksValueError('Received empty model_classes')
if model_classes and sorted(model_classes) != model_classes:
supported_models_link = doclink(
'supported-prediction-format',
template='For more information please refer to the Supported Models guide {link}')
raise DeepchecksValueError(f'Received unsorted model_classes. {supported_models_link}')
if model_classes is None:
model_classes = infer_classes_from_model(model)
labels = None
if train and train.label_type:
task_type = train.label_type
elif model_classes:
task_type = infer_task_type_by_class_number(len(model_classes))
else:
labels = get_all_labels(model, train, test, y_pred_train, y_pred_test)
task_type = infer_task_type_by_labels(labels)
observed_classes = None
if (model is None and
(y_pred_train is not None or y_pred_test is not None or y_proba_train is not None
or y_proba_test is not None)):
# If there is no pred, we use the observed classes to zip between the proba and the classes
if y_pred_train is None and model_classes is None:
# Does not calculate labels twice
labels = labels if labels is not None else get_all_labels(model, train, test, y_pred_train, y_pred_test)
observed_classes = sorted(labels.dropna().unique().tolist())
model = _DummyModel(train=train, test=test,
y_pred_train=y_pred_train, y_pred_test=y_pred_test,
y_proba_test=y_proba_test, y_proba_train=y_proba_train,
# Use model classes if exists, else observed classes
model_classes=model_classes or observed_classes)
self._task_type = task_type
self._observed_classes = observed_classes
self._model_classes = model_classes
self._train = train
self._test = test
self._model = model
self._feature_importance_force_permutation = feature_importance_force_permutation
self._feature_importance = feature_importance
self._feature_importance_timeout = feature_importance_timeout
self._importance_type = None
self._validated_model = False
self._with_display = with_display
# Properties
# Validations note: We know train & test fit each other so all validations can be run only on train
@property
def model(self) -> BasicModel:
"""Return & validate model if model exists, otherwise raise error."""
if self._model is None:
raise DeepchecksNotSupportedError('Check is irrelevant for Datasets without model')
if not self._validated_model:
if self._train:
validate_model(self._train, self._model)
self._validated_model = True
return self._model
@property
def model_classes(self) -> t.List:
"""Return ordered list of possible label classes for classification tasks or None for regression."""
if self._model_classes is None and self.task_type in (TaskType.BINARY, TaskType.MULTICLASS):
# If in infer_task_type we didn't find classes on model, or user didn't pass any, then using the observed
get_logger().warning('Could not find model\'s classes, using the observed classes. '
'In order to make sure the classes used by the model are inferred correctly, '
'please use the model_classes argument')
self._model_classes = self.observed_classes
return self._model_classes
@property
def observed_classes(self) -> t.List:
"""Return the observed classes in both train and test. None for regression."""
# If did not cache yet the observed classes than calculate them
if self._observed_classes is None and self.task_type in (TaskType.BINARY, TaskType.MULTICLASS):
labels = get_all_labels(self._model, self._train, self._test)
self._observed_classes = sorted(labels.dropna().unique().tolist())
return self._observed_classes
@property
def model_name(self):
"""Return model name."""
return type(self.model).__name__
@property
def task_type(self) -> TaskType:
"""Return task type based on calculated classes argument."""
return self._task_type
@property
def feature_importance(self) -> t.Optional[pd.Series]:
"""Return feature importance, or None if not possible."""
if not self._calculated_importance:
if self._model and (self._train or self._test):
permutation_kwargs = {'timeout': self._feature_importance_timeout}
dataset = self.test if self.have_test() else self.train
importance, importance_type = calculate_feature_importance_or_none(
self._model, dataset, self.model_classes, self._observed_classes, self.task_type,
self._feature_importance_force_permutation, permutation_kwargs
)
self._feature_importance = importance
self._importance_type = importance_type
else:
self._feature_importance = None
self._calculated_importance = True
return self._feature_importance
@property
def feature_importance_timeout(self) -> t.Optional[int]:
"""Return feature importance timeout."""
return self._feature_importance_timeout
@property
def feature_importance_type(self) -> t.Optional[str]:
"""Return feature importance type if feature importance is available, else None."""
# Calling first feature_importance, because _importance_type is assigned only after feature importance is
# calculated.
if self.feature_importance:
return self._importance_type
return None
def have_test(self):
"""Return whether there is test dataset defined."""
return self._test is not None
def assert_classification_task(self):
"""Assert the task_type is classification."""
if self.task_type == TaskType.REGRESSION and self.train.has_label():
raise ModelValidationError('Check is irrelevant for regression tasks')
def assert_regression_task(self):
"""Assert the task type is regression."""
if self.task_type != TaskType.REGRESSION and self.train.has_label():
raise ModelValidationError('Check is irrelevant for classification tasks')
def get_scorers(self,
scorers: t.Union[t.Mapping[str, t.Union[str, t.Callable]], t.List[str]] = None,
use_avg_defaults=True) -> t.List[DeepcheckScorer]:
"""Return initialized & validated scorers if provided or default scorers otherwise.
Parameters
----------
scorers : Union[List[str], Dict[str, Union[str, Callable]]], default: None
List of scorers to use. If None, use default scorers.
Scorers can be supplied as a list of scorer names or as a dictionary of names and functions.
use_avg_defaults : bool, default True
If no scorers were provided, for classification, determines whether to use default scorers that return
an averaged metric, or default scorers that return a metric per class.
Returns
-------
List[DeepcheckScorer]
A list of initialized & validated scorers.
"""
scorers = scorers or get_default_scorers(self.task_type, use_avg_defaults)
return init_validate_scorers(scorers, self.model, self.train, self.model_classes, self.observed_classes)
def get_single_scorer(self,
scorer: t.Mapping[str, t.Union[str, t.Callable]] = None,
use_avg_defaults=True) -> DeepcheckScorer:
"""Return initialized & validated scorer if provided or a default scorer otherwise.
Parameters
----------
scorer : Union[List[str], Dict[str, Union[str, Callable]]], default: None
List of scorers to use. If None, use default scorers.
Scorers can be supplied as a list of scorer names or as a dictionary of names and functions.
use_avg_defaults : bool, default True
If no scorers were provided, for classification, determines whether to use default scorers that return
an averaged metric, or default scorers that return a metric per class.
Returns
-------
List[DeepcheckScorer]
An initialized & validated scorer.
"""
scorer = scorer or get_default_scorers(self.task_type, use_avg_defaults)
# The single scorer is the first one in the dict
scorer_name = next(iter(scorer))
single_scorer_dict = {scorer_name: scorer[scorer_name]}
return init_validate_scorers(single_scorer_dict, self.model, self.train, self.model_classes,
self.observed_classes)[0]