/
evaluate.py
94 lines (70 loc) · 3.03 KB
/
evaluate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from sklearn.metrics import mean_absolute_error, f1_score
from numpy import mean, std
from sklearn.metrics import f1_score
from talos.utils.validation_split import kfold
from ..utils.validation_split import kfold
from ..utils.best_model import best_model, activate_model
class Evaluate:
'''Class for evaluating models based on the Scan() object'''
def __init__(self, scan_object):
'''Takes in as input a Scan() object.
e = evaluate(scan_object) and see docstring
for e() for more information.'''
self.scan_object = scan_object
self.data = scan_object.data
def evaluate(self, x, y,
model_id=None,
folds=5,
shuffle=True,
metric='val_acc',
mode = 'multi_label',
asc=False,
print_out=False):
'''Evaluate a model based on f1_score (all except regression)
or mae (for regression). Supports 'binary', 'multi_class',
'multi_label', and 'regression' evaluation.
x : array
The input data for making predictions
y : array
The ground truth for x
model_id : int
It's possible to evaluate a specific model based on ID. Can be None.
folds : int
Number of folds to use for cross-validation
sort_metric : string
A column name referring to the metric that was used in the scan_object
as a performance metric. This is used for sorting the results to pick
for evaluation.
shuffle : bool
Data is shuffled before evaluation.
mode : string
'binary', 'multi_class', 'multi_label', or 'regression'.
asc : bool
False if the metric is to be optimized upwards (e.g. accuracy or f1_score)
print_out : bool
Print out the results.
TODO: add possibility to input custom metrics.
'''
out = []
if model_id is None:
model_id = best_model(self.scan_object, metric, asc)
model = activate_model(self.scan_object, model_id)
kx, ky = kfold(x, y, folds, shuffle)
for i in range(folds):
y_pred = model.predict(kx[i], verbose=0)
if mode == 'binary':
y_pred = y_pred >= .5
scores = f1_score(y_pred , ky[i], average='binary')
elif mode == 'multi_class':
y_pred = y_pred.argmax(axis=-1)
scores = f1_score(y_pred , ky[i], average='macro')
if mode == 'multi_label':
y_pred = model.predict(kx[i]).argmax(axis=1)
scores = f1_score(y_pred , ky[i].argmax(axis=1), average='macro')
elif mode == 'regression':
y_pred = model.predict(kx[i])
scores = mean_absolute_error(y_pred, ky[i])
out.append(scores)
if print_out is True:
print("mean : %.2f \n std : %.2f" % (mean(out), std(out)))
return out