Skip to content

Commit

Permalink
Aggregate prediction explanations for derived features (#1901)
Browse files Browse the repository at this point in the history
* Rough draft.

* Displaying pipeline features when the feature is not in the original data.

* Displaying aggregated shap values.

* Fixing some tests.

* Unit tests passing.

* Aggregating all features we can aggregate. Adding tests.

* Adding comment explaining feature selection in _make_rows.

* Adding comments to _aggregate_shap_values.

* Fixing docstring in _aggregate_shap_values.

* Adding PR 1901 to release notes.

* Adding extra test case for when some created features are dropped.

* Adding comments to _user_interface.

* Updating comment in _aggregate_shap_values_dict.

* Fixing docstrings.

* Updating docstring in _aggregate_shap_values_dict.
  • Loading branch information
freddyaboulton committed Mar 3, 2021
1 parent c499006 commit 3b01866
Show file tree
Hide file tree
Showing 8 changed files with 500 additions and 77 deletions.
1 change: 1 addition & 0 deletions docs/source/release_notes.rst
Expand Up @@ -7,6 +7,7 @@ Release Notes
* Added ``DataCheckAction`` class and ``DataCheckActionCode`` enum :pr:`1896`
* Updated ``Woodwork`` requirement to ``v0.0.10`` :pr:`1900`
* Added utility method to create list of components from a list of ``DataCheckAction`` :pr:`1907`
* Aggregating the shap values for predictions that we know the provenance of, e.g. OHE, text, and date-time. :pr:`1901`
* Fixes
* Added metaclass for time series pipelines and fix binary classification pipeline ``predict`` not using objective if it is passed as a named argument :pr:`1874`
* Fixed stack trace in prediction explanation functions caused by mixed string/numeric pandas column names :pr:`1871`
Expand Down
65 changes: 65 additions & 0 deletions evalml/model_understanding/prediction_explanations/_algorithms.py
@@ -1,4 +1,5 @@
import warnings
from operator import add

import numpy as np
import shap
Expand Down Expand Up @@ -110,6 +111,70 @@ def _compute_shap_values(pipeline, features, training_data=None):
raise ValueError(f"Unknown shap_values datatype {str(type(shap_values))}!")


def _aggreggate_shap_values_dict(values, provenance):
"""Aggregates shap values across features created from a common feature.
For example, let's say the pipeline has a text featurizer that creates the columns: LSA_1, LSA_2, PolarityScore,
MeanCharacter, and DiversityScore from a column called "text_feature".
The values dictionary input to this function will have a key for each of the features created by the text featurizer,
but it will not have a key for the original "text_feature" column. It will look like this:
{"LSA_1": [0.2], "LSA_0": [0.3], "PolarityScore": [0.1], "MeanCharacters": [0.05], "DiversityScore": [-0.1], ...}
After this function, the values dictionary will look like: {"text_feature": [0.55]}
This aggregation will happen for all features for which we know the provenance/lineage. Other features will
be left as they are.
Arguments:
values (dict): A mapping of feature names to a list of SHAP values for each data point.
provenance (dict): A mapping from a feature in the original data to the names of the features that were created
from that feature.
Returns:
dict - mapping from feature name to shap values.
"""

child_to_parent = {}
for parent_feature, children in provenance.items():
for child in children:
if child in values:
child_to_parent[child] = parent_feature

agg_values = {}
for feature_name, shap_list in values.items():
# Only aggregate features for which we know the parent-feature
if feature_name in child_to_parent:
parent = child_to_parent[feature_name]
if parent not in agg_values:
agg_values[parent] = [0] * len(shap_list)
# Elementwise-sum without numpy
agg_values[parent] = list(map(add, agg_values[parent], shap_list))
else:
agg_values[feature_name] = shap_list
return agg_values


def _aggregate_shap_values(values, provenance):
"""Aggregates shap values across features created from a common feature.
Arguments:
values (dict): A mapping of feature names to a list of SHAP values for each data point.
provenance (dict): A mapping from a feature in the original data to the names of the features that were created
from that feature
Returns:
dict
Returns:
dict or list(dict)
"""
if isinstance(values, dict):
return _aggreggate_shap_values_dict(values, provenance)
else:
return [_aggreggate_shap_values_dict(class_values, provenance) for class_values in values]


def _normalize_values_dict(values):
"""Normalizes SHAP values by dividing by the sum of absolute values for each feature.
Expand Down
180 changes: 133 additions & 47 deletions evalml/model_understanding/prediction_explanations/_user_interface.py

Large diffs are not rendered by default.

10 changes: 6 additions & 4 deletions evalml/model_understanding/prediction_explanations/explainers.py
Expand Up @@ -18,7 +18,7 @@
from evalml.utils.gen_utils import drop_rows_with_nans

# Container for all of the pipeline-related data we need to create reports. Helps standardize APIs of report makers.
_ReportData = namedtuple("ReportData", ["pipeline", "pipeline_features",
_ReportData = namedtuple("ReportData", ["pipeline", "pipeline_features", "input_features",
"y_true", "y_pred", "y_pred_values", "errors", "index_list", "metric"])


Expand Down Expand Up @@ -53,7 +53,9 @@ def explain_prediction(pipeline, input_features, y, index_to_explain, top_k_feat
raise ValueError(f"Parameter output_format must be either text, dict, or dataframe. Received {output_format}")
if any([x < 0 or x >= len(input_features) for x in [index_to_explain]]):
raise ValueError(f"Explained indices should be between 0 and {len(input_features) - 1}")
return _make_single_prediction_shap_table(pipeline, pipeline_features, index_to_explain, top_k_features, include_shap_values,
return _make_single_prediction_shap_table(pipeline, pipeline_features,
input_features,
index_to_explain, top_k_features, include_shap_values,
output_format=output_format)


Expand Down Expand Up @@ -94,7 +96,7 @@ def explain_predictions(pipeline, input_features, y, indices_to_explain, top_k_f

pipeline_features = pipeline.compute_estimator_features(input_features, y).to_dataframe()

data = _ReportData(pipeline, pipeline_features, y_true=y, y_pred=None,
data = _ReportData(pipeline, pipeline_features, input_features, y_true=y, y_pred=None,
y_pred_values=None, errors=None, index_list=indices_to_explain, metric=None)

report_creator = _report_creator_factory(data, report_type="explain_predictions",
Expand Down Expand Up @@ -180,7 +182,7 @@ def explain_predictions_best_worst(pipeline, input_features, y_true, num_to_expl

pipeline_features = pipeline.compute_estimator_features(input_features, y_true).to_dataframe()

data = _ReportData(pipeline, pipeline_features, y_true, y_pred, y_pred_values, errors, index_list, metric)
data = _ReportData(pipeline, pipeline_features, input_features, y_true, y_pred, y_pred_values, errors, index_list, metric)

report_creator = _report_creator_factory(data, report_type="explain_predictions_best_worst",
output_format=output_format, top_k_features=top_k_features,
Expand Down
6 changes: 6 additions & 0 deletions evalml/tests/conftest.py
Expand Up @@ -7,6 +7,7 @@
from sklearn import datasets
from skopt.space import Integer, Real

from evalml.demos import load_fraud
from evalml.model_family import ModelFamily
from evalml.objectives.utils import (
get_core_objectives,
Expand Down Expand Up @@ -536,3 +537,8 @@ def _make_data_type(data_type, data):
return data

return _make_data_type


@pytest.fixture
def fraud_100():
return load_fraud(n_rows=100)
Expand Up @@ -7,6 +7,7 @@

from evalml.model_family.model_family import ModelFamily
from evalml.model_understanding.prediction_explanations._algorithms import (
_aggregate_shap_values,
_compute_shap_values,
_create_dictionary,
_normalize_shap_values
Expand Down Expand Up @@ -149,6 +150,12 @@ def test_normalize_values_exceptions():
_normalize_shap_values(1)


def check_equal_dicts(normalized, answer):
assert set(normalized.keys()) == set(answer)
for key in normalized:
np.testing.assert_almost_equal(normalized[key], answer[key], decimal=4)


@pytest.mark.parametrize("values,answer", [({"a": [-0.5, 0, 0.5], "b": [0.1, -0.6, 0.2]},
{"a": [-0.5 / 0.6, 0, 0.5 / 0.7], "b": [0.1 / 0.6, -1.0, 0.2 / 0.7]}),
([{"a": [-0.5, 0, 0.5], "b": [0.1, -0.6, 0.2]}] * 2,
Expand All @@ -164,11 +171,6 @@ def test_normalize_values_exceptions():
])
def test_normalize_values(values, answer):

def check_equal_dicts(normalized, answer):
assert set(normalized.keys()) == set(answer)
for key in normalized:
np.testing.assert_almost_equal(normalized[key], answer[key], decimal=4)

normalized = _normalize_shap_values(values)
if isinstance(normalized, dict):
check_equal_dicts(normalized, answer)
Expand All @@ -177,3 +179,22 @@ def check_equal_dicts(normalized, answer):
assert len(normalized) == len(answer)
for values, correct in zip(normalized, answer):
check_equal_dicts(values, correct)


@pytest.mark.parametrize("values,provenance,answer", [({"a_0": [-0.5, 0, 0.5], "a_1": [1, 1, 2], "b": [0.1, -0.6, 0.2]},
{"a": ["a_0", "a_1"]},
{"a": [0.5, 1, 2.5], "b": [0.1, -0.6, 0.2]}),
([{"a_0": [0.5, 1.0, 2.0], "a_1": [1.2, 1.5, 0.6], "b": [0.5, 0.2, 0.5]},
{"a_0": [-0.5, 0, 0.5], "a_1": [1, 1, 2], "b": [0.1, -0.6, 0.2]}],
{"a": ["a_0", "a_1"], "c": ["c_1", "c_2"]},
[{"a": [1.7, 2.5, 2.6], "b": [0.5, 0.2, 0.5]},
{"a": [0.5, 1, 2.5], "b": [0.1, -0.6, 0.2]}])])
def test_aggregate_values(values, provenance, answer):
aggregated = _aggregate_shap_values(values, provenance)

if isinstance(aggregated, dict):
check_equal_dicts(aggregated, answer)
else:
assert len(aggregated) == len(answer)
for values, correct in zip(aggregated, answer):
check_equal_dicts(values, correct)

0 comments on commit 3b01866

Please sign in to comment.