-
Notifications
You must be signed in to change notification settings - Fork 16
/
linear_model.py
305 lines (250 loc) · 10.5 KB
/
linear_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
"""
.. versionadded:: 0.0.2
The :mod:`fatf.transparency.sklearn.linear_model` module implements linear
scikit-learn model explainers.
"""
# Author: Kacper Sokol <k.sokol@bristol.ac.uk>
# License: new BSD
from typing import Union
import sklearn.linear_model
import sklearn.utils.validation
import numpy as np
import fatf.transparency.sklearn.tools as ftst
import fatf.utils.tools as fut
__all__ = ['linear_classifier_coefficients', 'SKLearnLinearModelExplainer']
_SKLEARN_VERSION = [int(i) for i in sklearn.__version__.split('.')[:2]]
_SKLEARN_0_22 = fut.at_least_verion([0, 22], _SKLEARN_VERSION)
if _SKLEARN_0_22: # pragma: nocover
# pylint: disable=invalid-name,protected-access,no-member
_linear_base = sklearn.linear_model._base
_linear_coordinate_descent = sklearn.linear_model._coordinate_descent
_lienar_stochastic_gradient = sklearn.linear_model._stochastic_gradient
_linear_bayes = sklearn.linear_model._bayes
_linear_theil = sklearn.linear_model._theil_sen
_linear_omp = sklearn.linear_model._omp
_linear_ridge = sklearn.linear_model._ridge
_linear_angles = sklearn.linear_model._least_angle
else: # pragma: nocover
_linear_base = sklearn.linear_model.base # pylint: disable=invalid-name
# pylint: disable=invalid-name
_linear_coordinate_descent = sklearn.linear_model.coordinate_descent
_lienar_stochastic_gradient = sklearn.linear_model.stochastic_gradient
_linear_bayes = sklearn.linear_model.bayes
_linear_theil = sklearn.linear_model.theil_sen
_linear_omp = sklearn.linear_model.omp
_linear_ridge = sklearn.linear_model.ridge
_linear_angles = sklearn.linear_model.least_angle
_LINEAR = (_linear_base.LinearModel, _linear_coordinate_descent.LinearModelCV)
_LINEAR_CLASSIFIER = (_linear_base.LinearClassifierMixin, )
_LINEAR_REGRESSOR = (
_linear_base.LinearRegression,
_lienar_stochastic_gradient.BaseSGDRegressor,
_linear_bayes.BayesianRidge,
_linear_bayes.ARDRegression,
_linear_coordinate_descent.ElasticNet,
_linear_coordinate_descent.ElasticNetCV,
_linear_coordinate_descent.LassoCV,
_linear_theil.TheilSenRegressor,
_linear_omp.OrthogonalMatchingPursuit,
_linear_omp.OrthogonalMatchingPursuitCV,
_linear_ridge.Ridge,
_linear_ridge.RidgeCV,
sklearn.linear_model.HuberRegressor,
_linear_angles.Lars,
sklearn.svm.LinearSVR,
#
_linear_coordinate_descent.MultiTaskLassoCV,
_linear_coordinate_descent.MultiTaskElasticNetCV)
def _is_scikit_linear(clf: sklearn.base.BaseEstimator) -> bool:
"""
Checks whether a scikit-learn model is a linear model.
Children of the following classes are considered linear:
- ``sklearn.linear_model.base.LinearModel``,
- ``sklearn.linear_model.coordinate_descent.LinearModelCV``,
- ``sklearn.linear_model.base.LinearRegression``, and
- ``sklearn.linear_model.base.LinearClassifierMixin``.
Parameters
----------
clf : sklearn.base.BaseEstimator
A scikit-learn predictor.
Returns
-------
is_scikit_linear : boolean
``True`` if the predictor is any of the scikit-learn linear models,
``False`` otherwise.
"""
assert ftst.is_sklearn_model_instance(clf), 'Invalid sklearn predictor.'
is_scikit_linear = isinstance(clf, (_LINEAR_REGRESSOR, _LINEAR_CLASSIFIER))
return is_scikit_linear
def _is_fitted_linear(clf: sklearn.base.BaseEstimator) -> bool:
"""
Checks whether a scikit-learn linear model is fitted.
The check succeeds if the ``clf`` classifier has a ``coef_`` attribute.
Parameters
----------
clf : sklearn.base.BaseEstimator
A linear scikit-learn model.
Raises
------
sklearn.exceptions.NotFittedError
The scikit-learn package will raise this exception if the model is not
fitted.
Returns
-------
is_fitted_linear : boolean
``True`` if the linear predictor is fitted, ``False`` otherwise.
"""
assert _is_scikit_linear(clf), 'Has to be an linear scikit-learn model.'
is_fitted_linear = False
# (clf, ['coef_', 'intercept_'], all_or_any=any)
if _SKLEARN_0_22: # pragma: nocover
# pylint: disable=no-value-for-parameter
sklearn.utils.validation.check_is_fitted(clf)
else: # pragma: nocover
sklearn.utils.validation.check_is_fitted(clf, 'coef_', all_or_any=all)
is_fitted_linear = True
return is_fitted_linear
def linear_classifier_coefficients(
clf: sklearn.base.BaseEstimator) -> np.ndarray:
"""
Extracts coefficients (feature importances) of a linear scikit-learn model.
.. versionadded:: 0.0.2
.. note::
Please note that for the coefficients (feature importances) to be
comparable the values of all features had to be normalised to the same
range before training the model.
Parameters
----------
clf : sklearn.base.BaseEstimator
A linear scikit-learn model.
Raises
------
sklearn.exceptions.NotFittedError
The scikit-learn package (``sklearn.utils.validation.check_is_fitted``
function) will raise this exception if the model is not fitted.
TypeError
The ``clf`` classifier is not a scikit-learn linear model.
Returns
-------
coefficients : numpy.ndarray
A numpy array that holds coefficients of the ``clf`` linear model.
(The order of the coefficients corresponds to the order of the features
in the training data array).
"""
# Has to be a linear sklearn model
if not _is_scikit_linear(clf):
raise TypeError('This functionality is designated for linear-like '
'scikit-learn predictor instances only. Instead got: '
'{}.{}.'.format(clf.__module__,
clf.__class__.__name__))
assert _is_fitted_linear(clf), 'Has to be a fitted sklearn linear model.'
assert hasattr(clf, 'coef_'), 'coef_ attribute missing.'
coefficients = clf.coef_
# assert hasattr(clf, 'intercept_'), 'intercept_ attribute missing.'
# intercept = clf.intercept_
return coefficients
class SKLearnLinearModelExplainer(ftst.SKLearnExplainer):
"""
A scikit-learn linear model explainer class.
.. versionadded:: 0.0.2
This class implements a ``feature_importance`` method that returns
coefficients of the linear ``clf`` model. This coefficients can be
interpreted as features (positive or negative) importance.
.. note::
Please note that for the coefficients (feature importances) to be
comparable the values of all features had to be normalised to the same
range before training the model.
For other functionality, parameters, attributes, logs, warnings and errors
implemented by this class please see its parent class:
:class:`fatf.transparency.sklearn.tools.SKLearnExplainer`.
"""
# pylint: disable=abstract-method
def feature_importance(self) -> np.ndarray:
"""
Extracts features importance from the ``clf`` predictor.
Returns
-------
feature_importance_array : numpy.ndarray
A numpy array with coefficients of the ``clf`` linear model.
(The order of the coefficients corresponds to the order of the
features in the training data array.)
"""
feature_importance_array = linear_classifier_coefficients(self.clf)
return feature_importance_array
def _is_classifier(self) -> bool:
"""
Decides whether the linear ``clf`` model is a classifier or regressor.
Returns
-------
is_classifier : boolean
``True`` if the linear ``clf`` model is a classifier and ``False``
if it is a regressor.
"""
if isinstance(self.clf, _LINEAR_CLASSIFIER):
is_classifier = True
elif isinstance(self.clf, _LINEAR_REGRESSOR):
is_classifier = False
else:
assert False, 'Not a linear predictive model?' # pragma: no cover
return is_classifier
def _validate_kind_fitted(self) -> bool:
"""
Validates that the ``clf`` model is *linear* and *fitted*.
Raises
------
sklearn.exceptions.NotFittedError
The scikit-learn package
(``sklearn.utils.validation.check_is_fitted`` function) will raise
this exception if the model is not fitted.
TypeError
The ``clf`` classifier is not a scikit-learn linear model.
Returns
-------
is_linear_fitted : boolean
``True`` if the ``clf`` model is linear and fitted. ``False`` if
the model is either not fitted or is not linear.
"""
is_linear_fitted = False
if not _is_scikit_linear(self.clf):
raise TypeError('This functionality is designated for linear-like '
'scikit-learn predictor instances only. Instead '
'got: {}.{}.'.format(self.clf.__module__,
self.clf.__class__.__name__))
assert _is_fitted_linear(self.clf), 'Has to be a fitted linear model.'
is_linear_fitted = True
return is_linear_fitted
def _get_features_number(self) -> int:
"""
Extracts the number of features expected by the ``clf`` model.
Returns
-------
features_number : integer
The number of features that the ``clf`` model is expecting (was
trained on).
"""
if self.is_classifier:
features_number = self.clf.coef_.shape[1]
else:
coef_shape_dim = len(self.clf.coef_.shape)
if coef_shape_dim == 1: # Single-task regression
features_number = self.clf.coef_.shape[0]
elif coef_shape_dim == 2: # Multi-task regression
features_number = self.clf.coef_.shape[1]
else:
assert False, 'Incompatible _coef shape.' # pragma: nocover
return features_number
def _get_classes_array(self) -> Union[np.ndarray, None]:
"""
Extracts the unique class id's that the ``clf`` model can output.
Returns
-------
classes_array : Union[None, List[Union[string, integer]]]
``None`` if the ``clf`` is a regressor. A numpy array with the
unique class id's (unique elements of the target, i.e. ground
truth, array used to fit the ``clf`` model).
"""
if self.is_classifier:
classes_array = self.clf.classes_
else:
classes_array = None
return classes_array