forked from TeamHG-Memex/eli5
-
Notifications
You must be signed in to change notification settings - Fork 2
/
base.py
230 lines (201 loc) · 7.63 KB
/
base.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
# -*- coding: utf-8 -*-
from typing import Any, List, Tuple, Union
from .base_utils import attrs
from .formatters.features import FormattedFeatureName
# @attrs decorator used in this file calls @attr.s(slots=True),
# creating attr.ib entries based on the signature of __init__.
@attrs
class Explanation(object):
""" An explanation for classifier or regressor,
it can either explain weights or a single prediction.
"""
def __init__(self,
estimator, # type: str
description=None, # type: str
error=None, # type: str
method=None, # type: str
is_regression=False, # type: bool
targets=None, # type: List[TargetExplanation]
feature_importances=None, # type: FeatureImportances
decision_tree=None, # type: TreeInfo
highlight_spaces=None,
transition_features=None, # type: TransitionFeatureWeights
):
# type: (...) -> None
self.estimator = estimator
self.description = description
self.error = error
self.method = method
self.is_regression = is_regression
self.targets = targets
self.feature_importances = feature_importances
self.decision_tree = decision_tree
self.highlight_spaces = highlight_spaces
self.transition_features = transition_features
def _repr_html_(self):
""" HTML formatting for the notebook.
"""
from eli5.formatters import fields
from eli5.formatters.html import format_as_html
return format_as_html(self, force_weights=False, show=fields.WEIGHTS)
@attrs
class FeatureImportances(object):
""" Feature importances with number of remaining non-zero features.
"""
def __init__(self, importances, remaining):
# type: (...) -> None
self.importances = importances # type: List[FeatureWeight]
self.remaining = remaining # type: int
@classmethod
def from_names_values(cls, names, values, std=None, **kwargs):
params = zip(names, values) if std is None else zip(names, values, std)
importances = [FeatureWeight(*x) for x in params] # type: ignore
return cls(importances, **kwargs)
@attrs
class TargetExplanation(object):
""" Explanation for a single target or class.
Feature weights are stored in the :feature_weights: attribute,
and features highlighted in text in the :weighted_spans: attribute.
"""
def __init__(self,
target, # type: str
feature_weights, # type: FeatureWeights
proba=None, # type: float
score=None, # type: float
weighted_spans=None, # type: WeightedSpans
):
# type: (...) -> None
self.target = target
self.feature_weights = feature_weights
self.proba = proba
self.score = score
self.weighted_spans = weighted_spans
# List is currently used for unhashed features
Feature = Union[str, List, FormattedFeatureName]
@attrs
class FeatureWeights(object):
""" Weights for top features, :pos: for positive and :neg: for negative,
sorted by descending absolute value.
Number of remaining positive and negative features are stored in
:pos_remaining: and :neg_remaining: attributes.
"""
def __init__(self,
pos, # type: List[FeatureWeight]
neg, # type: List[FeatureWeight]
pos_remaining=0, # type: int
neg_remaining=0, # type: int
):
# type: (...) -> None
self.pos = pos
self.neg = neg
self.pos_remaining = pos_remaining
self.neg_remaining = neg_remaining
@attrs
class FeatureWeight(object):
def __init__(self,
feature, # type: Feature
weight, # type: float
std=None, # type: float
value=None, # type: Any
):
# type: (...) -> None
self.feature = feature
self.weight = weight
self.std = std
self.value = value
@attrs
class WeightedSpans(object):
""" Holds highlighted spans for parts of document - a DocWeightedSpans
object for each vectorizer, and other features not highlighted anywhere.
"""
def __init__(self,
docs_weighted_spans, # type: List[DocWeightedSpans]
other=None, # type: FeatureWeights
):
# type: (...) -> None
self.docs_weighted_spans = docs_weighted_spans
self.other = other
WeightedSpan = Tuple[
Feature,
List[Tuple[int, int]], # list of spans (start, end) for this feature
float, # feature weight
]
@attrs
class DocWeightedSpans(object):
""" Features highlighted in text. :document: is a pre-processed document
before applying the analyzer. :weighted_spans: holds a list of spans
for features found in text (span indices correspond to
:document:). :preserve_density: determines how features are colored
when doing formatting - it is better set to True for char features
and to False for word features.
"""
def __init__(self,
document, # type: str
spans, # type: List[WeightedSpan]
preserve_density=None, # type: bool
vec_name=None, # type: str
):
# type: (...) -> None
self.document = document
self.spans = spans
self.preserve_density = preserve_density
self.vec_name = vec_name
@attrs
class TransitionFeatureWeights(object):
""" Weights matrix for transition features. """
def __init__(self,
class_names, # type: List[str]
coef,
):
# type: (...) -> None
self.class_names = class_names
self.coef = coef
@attrs
class TreeInfo(object):
""" Information about the decision tree. :criterion: is the name of
the function to measure the quality of a split, :tree: holds all nodes
of the tree, and :graphviz: is the tree rendered in graphviz .dot format.
"""
def __init__(self,
criterion, # type: str
tree, # type: NodeInfo
graphviz, # type: str
is_classification, # type: bool
):
# type: (...) -> None
self.criterion = criterion
self.tree = tree
self.graphviz = graphviz
self.is_classification = is_classification
@attrs
class NodeInfo(object):
""" A node in a binary tree.
Pointers to left and right children are in :left: and :right: attributes.
"""
def __init__(self,
id, # type: int
is_leaf, # type: bool
value,
value_ratio,
impurity, # type: float
samples, # type: int
sample_ratio, # type: float
feature_name=None, # type: str
feature_id=None, # type: int
threshold=None, # type: float
left=None, # type: NodeInfo
right=None, # type: NodeInfo
):
# type: (...) -> None
self.id = id
self.is_leaf = is_leaf
self.value = value
self.value_ratio = value_ratio
self.impurity = impurity
self.samples = samples
self.sample_ratio = sample_ratio
self.feature_name = feature_name
self.feature_id = feature_id
self.threshold = threshold
self.left = left
self.right = right