Skip to content

Commit

Permalink
flatten coef_ in FI calculation for linear regression (#326)
Browse files Browse the repository at this point in the history
closes #317
  • Loading branch information
noamzbr committed Dec 27, 2021
1 parent b7f9ccd commit 4fbd46c
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 2 deletions.
2 changes: 1 addition & 1 deletion deepchecks/utils/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _built_in_importance(model: t.Any, dataset: 'base.Dataset') -> t.Optional[pd
normalized_feature_importance_values = model.feature_importances_/model.feature_importances_.sum()
return pd.Series(normalized_feature_importance_values, index=dataset.features)
elif 'coef_' in dir(model): # Linear models
coef = np.abs(model.coef_)
coef = np.abs(model.coef_.flatten())
coef = coef / coef.sum()
return pd.Series(coef, index=dataset.features)
else:
Expand Down
17 changes: 16 additions & 1 deletion tests/base/feature_importance_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@
import pandas as pd
from hamcrest import equal_to, assert_that, calling, raises, close_to, not_none, none
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.neural_network import MLPClassifier

from deepchecks.utils.features import calculate_feature_importance, calculate_feature_importance_or_null, \
column_importance_sorter_df, column_importance_sorter_dict
from deepchecks.errors import DeepchecksValueError
from deepchecks.base import Dataset


def test_adaboost(iris_split_dataset_and_model):
Expand All @@ -43,6 +44,20 @@ def test_linear_regression(diabetes):
assert_that(feature_importances.sum(), close_to(1, 0.000001))


def test_logistic_regression():
train_df = pd.DataFrame([[23, True], [19, False], [15, False], [5, True]], columns=['age', 'smoking'],
index=[0, 1, 2, 3])
train_y = pd.Series([1, 1, 0, 0])

logreg = LogisticRegression()
logreg.fit(train_df, train_y)

ds_train = Dataset(df=train_df, label=train_y)

feature_importances = calculate_feature_importance(logreg, ds_train)
assert_that(feature_importances.sum(), close_to(1, 0.000001))


def test_calculate_importance(iris_labeled_dataset):
clf = MLPClassifier(hidden_layer_sizes=(10,), random_state=42)
clf.fit(iris_labeled_dataset.features_columns, iris_labeled_dataset.label_col)
Expand Down

0 comments on commit 4fbd46c

Please sign in to comment.