Skip to content

Commit

Permalink
Change condition of simple model check to use gain (#488)
Browse files Browse the repository at this point in the history
* Change condition of simple model check to use gain

* Fix tests

* Fix notebooks

* update condition

* Add tests

* Add tests to models

* Add header license

* Add changes to the gain

* Update NBs for bressler
  • Loading branch information
matanper committed Jan 5, 2022
1 parent 421d332 commit a54f6fe
Show file tree
Hide file tree
Showing 8 changed files with 1,012 additions and 676 deletions.
233 changes: 143 additions & 90 deletions deepchecks/checks/performance/simple_model_comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
# ----------------------------------------------------------------------------
#
"""Module containing simple comparison check."""
from collections import defaultdict
from typing import Callable, Dict, Hashable, List, cast

import numpy as np
Expand All @@ -22,17 +23,18 @@
from deepchecks import CheckResult, Dataset
from deepchecks.base.check import ConditionResult, TrainTestBaseCheck
from deepchecks.checks.distribution.preprocessing import ScaledNumerics
from deepchecks.utils.strings import format_number
from deepchecks.utils.strings import format_percent
from deepchecks.utils.validation import validate_model
from deepchecks.errors import DeepchecksValueError
from deepchecks.utils.metrics import (
task_type_check,
ModelType,
initialize_multi_scorers,
get_scorers_list,
get_scores_ratio,
get_scorer_single
get_scorer_single,
get_gain
)
from deepchecks.utils.models import RandomModel


__all__ = ['SimpleModelComparison']
Expand All @@ -51,8 +53,8 @@ class SimpleModelComparison(TrainTestBaseCheck):
alternative_scorers : Dict[str, Callable], default None
An optional dictionary of scorer title to scorer functions/names. If none given, using default scorers.
For description about scorers see Notes below.
maximum_ratio : int
the ratio can be up to infinity so choose maximum value to limit to.
max_gain : float
the maximum value for the gain value, limits from both sides [-max_gain, max_gain]
max_depth : int
the max depth of the tree (used only if simple model type is tree).
random_state : int
Expand Down Expand Up @@ -89,11 +91,11 @@ def my_mse(y_true, y_pred):
"""

def __init__(self, simple_model_type: str = 'constant', alternative_scorers: Dict[str, Callable] = None,
maximum_ratio: int = 50, max_depth: int = 3, random_state: int = 42):
max_gain: float = 50, max_depth: int = 3, random_state: int = 42):
super().__init__()
self.simple_model_type = simple_model_type
self.alternative_scorers = initialize_multi_scorers(alternative_scorers)
self.maximum_ratio = maximum_ratio
self.max_gain = max_gain
self.max_depth = max_depth
self.random_state = random_state

Expand Down Expand Up @@ -142,13 +144,27 @@ def run(
# Multiclass have different return type from the scorer, list of score per class instead of single score
if task_type in [ModelType.MULTICLASS, ModelType.BINARY]:
n_samples = label.groupby(label).count()
classes = test_dataset.classes

results_array = []
# Dict in format { Scorer : Dict { Class : Dict { Origin/Simple : score } } }
results_dict = {}
for scorer in scorers:
model_dict = defaultdict(dict)
for model_name, model_type, model_instance in models:
for class_score, class_value in zip(scorer(model_instance, test_dataset), classes):
model_dict[class_value][model_type] = class_score
results_array.append([model_name,
model_type,
class_score,
scorer.name,
class_value,
n_samples[class_value]
])
results_dict[scorer.name] = model_dict

results_df = pd.DataFrame(
[
[model_name, model_type, class_score, scorer.name, class_value, n_samples[class_value]]
for model_name, model_type, model_instance in models
for scorer in scorers # scorer returns numpy array with item per class
for class_score, class_value in zip(scorer(model_instance, test_dataset), test_dataset.classes)
],
results_array,
columns=['Model', 'Type', 'Value', 'Metric', 'Class', 'Number of samples']
)

Expand All @@ -170,16 +186,26 @@ def run(
)

else:
classes = None

results_array = []
# Dict in format { Scorer : Dict { Origin/Simple : score } }
results_dict = {}
for scorer in scorers:
model_dict = defaultdict(dict)
for model_name, model_type, model_instance in models:
score = scorer(model_instance, test_dataset)
model_dict[model_type] = score
results_array.append([model_name,
model_type,
score,
scorer.name,
label.count()
])
results_dict[scorer.name] = model_dict

results_df = pd.DataFrame(
[
[model_name,
model_type,
scorer(model_instance, test_dataset),
scorer.name,
label.count()]
for model_name, model_type, model_instance in models
for scorer in scorers
],
results_array,
columns=['Model', 'Type', 'Value', 'Metric', 'Number of samples']
)

Expand All @@ -200,7 +226,14 @@ def run(
.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=True))
)

return CheckResult({'scores': results_df, 'type': task_type}, display=fig)
# For each scorer calculate perfect score in order to calculate later the ratio in conditions
scorers_perfect = {scorer.name: scorer.score_perfect(test_dataset) for scorer in scorers}

return CheckResult({'scores': results_dict,
'type': task_type,
'scorers_perfect': scorers_perfect,
'classes': classes
}, display=fig)

def _create_simple_model(self, train_ds: Dataset, task_type: ModelType):
"""Create a simple model of given type (random/constant/tree) to the given dataset.
Expand Down Expand Up @@ -253,73 +286,93 @@ def _create_simple_model(self, train_ds: Dataset, task_type: ModelType):
simple_model.fit(train_ds.features_columns, train_ds.label_col)
return simple_model

def add_condition_ratio_not_less_than(self, min_allowed_ratio: float = 1.1, classes: List[Hashable] = None):
"""Add condition - require min allowed ratio between the given and the simple model.
def add_condition_gain_not_less_than(self,
min_allowed_gain: float = 0.1,
classes: List[Hashable] = None,
average: bool = False):
"""Add condition - require minimum allowed gain between the model and the simple model.
Args:
min_allowed_ratio (float): Min allowed ratio between the given and the simple model -
ratio is given model / simple model (if the scorer returns negative values we divide 1 by it)
classes (List[Hashable]): Used in multiclass models to limit condition only to given classes.
min_allowed_gain (float): Minimum allowed gain between the model and the simple model -
gain is: difference in performance / (perfect score - simple score)
classes (List[Hashable]): Used in classification models to limit condition only to given classes.
average (bool): Used in classification models to flag if to run condition on average of classes, or on
each class individually
"""
def condition(result: Dict, max_ratio=self.maximum_ratio, class_list=classes) -> ConditionResult:
scores_df = result['scores']
task_type = result['type']
metrics = scores_df['Metric'].unique()

def get_ratio(df):
simple_score = df[df['Type'] == 'Simple']['Value'].iloc[0]
origin_score = df[df['Type'] == 'Origin']['Value'].iloc[0]
return get_scores_ratio(simple_score, origin_score, max_ratio)

fails = []
if task_type == ModelType.MULTICLASS:
if class_list is None:
class_list = scores_df['Class'].unique()
for metric in metrics:
failed_classes = []
for clas in class_list:
score_rows = scores_df[(scores_df['Metric'] == metric) & (scores_df['Class'] == clas)]
ratio = get_ratio(score_rows)
if ratio < min_allowed_ratio:
failed_classes.append(str(clas))
if failed_classes:
fails.append(f'"{metric}" - Classes: {", ".join(failed_classes)}')
else:
for metric in metrics:
score_rows = scores_df[(scores_df['Metric'] == metric)]
ratio = get_ratio(score_rows)
if ratio < min_allowed_ratio:
fails.append(f'"{metric}"')

if fails:
msg = f'Metrics failed: {", ".join(sorted(fails))}'
return ConditionResult(False, msg)
else:
return ConditionResult(True)

return self.add_condition('$$\\frac{\\text{model score}}{\\text{simple model score}} >= '
f'{format_number(min_allowed_ratio)}$$', condition)


class RandomModel:
"""Model used to randomly predict from given series of labels."""

def __init__(self):
self.labels = None

def fit(self, X, y): # pylint: disable=unused-argument,invalid-name
# The X is not used, but it is needed to be matching to sklearn `fit` signature
self.labels = y

def predict(self, X): # pylint: disable=invalid-name
return np.random.choice(self.labels, X.shape[0])

def predict_proba(self, X): # pylint: disable=invalid-name
classes = sorted(self.labels.unique().tolist())
predictions = self.predict(X)

def prediction_to_proba(y_pred):
proba = np.zeros(len(classes))
proba[classes.index(y_pred)] = 1
return proba
return np.apply_along_axis(prediction_to_proba, axis=1, arr=predictions)
name = f'Model performance gain over simple model must be at least {format_percent(min_allowed_gain)}'
if classes:
name = name + f' for classes {str(classes)}'
return self.add_condition(name,
condition,
include_classes=classes,
min_allowed_gain=min_allowed_gain,
max_gain=self.max_gain,
average=average)


def condition(result: Dict, include_classes=None, average=False, max_gain=None, min_allowed_gain=0) -> ConditionResult:
scores = result['scores']
task_type = result['type']
scorers_perfect = result['scorers_perfect']

fails = []
if task_type in [ModelType.MULTICLASS, ModelType.BINARY] and not average:
for metric, classes_scores in scores.items():
failed_classes = []
for clas, models_scores in classes_scores.items():
# Skip if class is not in class list
if include_classes is not None and clas not in include_classes:
continue

# If origin model is perfect, skip the gain calculation
if models_scores['Origin'] == scorers_perfect[metric]:
continue

gain = get_gain(models_scores['Simple'],
models_scores['Origin'],
scorers_perfect[metric],
max_gain)
if gain < min_allowed_gain:
failed_classes.append(str(clas))
if failed_classes:
fails.append(f'"{metric}" - Classes: {", ".join(failed_classes)}')
else:
if task_type in [ModelType.MULTICLASS, ModelType.BINARY]:
scores = average_scores(scores, include_classes)
for metric, models_scores in scores.items():
# If origin model is perfect, skip the gain calculation
if models_scores['Origin'] == scorers_perfect[metric]:
continue
gain = get_gain(models_scores['Simple'],
models_scores['Origin'],
scorers_perfect[metric],
max_gain)
if gain < min_allowed_gain:
fails.append(f'"{metric}"')

if fails:
msg = f'Metrics failed: {", ".join(sorted(fails))}'
return ConditionResult(False, msg)
else:
return ConditionResult(True)


def average_scores(scores, include_classes):
result = {}
for metric, classes_scores in scores.items():
origin_score = 0
simple_score = 0
total = 0
for clas, models_scores in classes_scores.items():
# Skip if class is not in class list
if include_classes is not None and clas not in include_classes:
continue
origin_score += models_scores['Origin']
simple_score += models_scores['Simple']
total += 1

result[metric] = {
'Origin': origin_score / total,
'Simple': simple_score / total
}
return result
2 changes: 1 addition & 1 deletion deepchecks/suites/default_suites.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def model_evaluation() -> Suite:
ConfusionMatrixReport(),
PerformanceReport().add_condition_train_test_relative_degradation_not_greater_than(),
RocReport().add_condition_auc_not_less_than(),
SimpleModelComparison().add_condition_ratio_not_less_than(),
SimpleModelComparison().add_condition_gain_not_less_than(),
ModelErrorAnalysis().add_condition_segments_performance_relative_difference_not_greater_than(),
CalibrationScore(),
TrustScoreComparison().add_condition_mean_score_percent_decline_not_greater_than(),
Expand Down
50 changes: 36 additions & 14 deletions deepchecks/utils/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"""Utils module containing utilities for checks working with metrics."""
import typing as t
import enum
import warnings
from numbers import Number

import numpy as np
Expand All @@ -23,6 +24,8 @@
from deepchecks import base # pylint: disable=unused-import; it is used for type annotations
from deepchecks import errors
from deepchecks.utils import validation
from deepchecks.utils.strings import is_string_column
from deepchecks.utils.models import PerfectModel


__all__ = [
Expand All @@ -36,14 +39,13 @@
'DEFAULT_BINARY_SCORERS',
'DEFAULT_MULTICLASS_SCORERS',
'MULTICLASS_SCORERS_NON_AVERAGE',
'get_scores_ratio',
'DEFAULT_SINGLE_SCORER_MULTICLASS_NON_AVG',
'initialize_multi_scorers',
'get_scorer_single',
'task_type_validation'
'task_type_validation',
'get_gain'
]

from deepchecks.utils.strings import is_string_column


class ModelType(enum.Enum):
"""Enum containing supported task types."""
Expand Down Expand Up @@ -136,6 +138,20 @@ def __call__(self, model, dataset: 'base.Dataset'):
df = self.filter_nulls(dataset)
return self._run_score(model, df, dataset)

def score_perfect(self, dataset: 'base.Dataset'):
"""Calculate the perfect score of the current scorer for given dataset."""
df = self.filter_nulls(dataset)
perfect_model = PerfectModel()
perfect_model.fit(None, dataset.label_col)
score = self._run_score(perfect_model, df, dataset)
if isinstance(score, np.ndarray):
# We expect the perfect score to be equal for all the classes, so takes the first one
first_score = score[0]
if any(score != first_score):
warnings.warn(f'Scorer {self.name} return different perfect score for differect classes')
return first_score
return score

def validate_fitting(self, model, dataset: 'base.Dataset', should_return_array: bool):
"""Validate given scorer for the model and dataset."""
df = self.filter_nulls(dataset)
Expand Down Expand Up @@ -313,13 +329,19 @@ def initialize_multi_scorers(alternative_scorers: t.Optional[t.Mapping[str, t.Ca
return [DeepcheckScorer(scorer, name) for name, scorer in alternative_scorers.items()]


def get_scores_ratio(train_score: float, test_score: float, max_ratio=np.Inf) -> float:
"""Return the ratio of test metric compared to train metric."""
if train_score == 0:
return max_ratio
else:
ratio = test_score / train_score
if train_score < 0 and test_score < 0:
ratio = 1 / ratio
ratio = min(max_ratio, ratio)
return ratio
def get_gain(base_score, score, perfect_score, max_gain):
"""Get gain between base score and score compared to the distance from the perfect score."""
distance_from_perfect = perfect_score - base_score
scores_diff = score - base_score
if distance_from_perfect == 0:
# If both base score and score are perfect, return 0 gain
if scores_diff == 0:
return 0
# else base_score is better than score, return -max_gain
return -max_gain
ratio = scores_diff / distance_from_perfect
if ratio < -max_gain:
return -max_gain
if ratio > max_gain:
return max_gain
return ratio

0 comments on commit a54f6fe

Please sign in to comment.